1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
4  * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
5  * Copyright (c) 2014      Research Organization for Information Science
6  *                         and Technology (RIST). All rights reserved.
7  * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
8  *                         reserved.
9  * Copyright (c) 2017      IBM Corporation. All rights reserved.
10  * $COPYRIGHT$
11  *
12  * Additional copyrights may follow
13  *
14  * $HEADER$
15  */
16 
17 #include "ompi_config.h"
18 #ifdef HAVE_UNISTD_H
19 #include <unistd.h>
20 #endif
21 #include <sys/types.h>
22 #ifdef HAVE_SYS_MMAN_H
23 #include <sys/mman.h>
24 #endif
25 #include <fcntl.h>
26 #include <stdlib.h>
27 #include <assert.h>
28 
29 #include "ompi/constants.h"
30 
31 #include "ompi/mca/rte/rte.h"
32 
33 #include "netpatterns.h"
34 
35 /* setup recursive doubleing tree node */
36 
ompi_netpatterns_setup_recursive_knomial_allgather_tree_node(int num_nodes,int node_rank,int tree_order,int * hier_ranks,netpatterns_k_exchange_node_t * exchange_node)37 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_knomial_allgather_tree_node(
38         int num_nodes, int node_rank, int tree_order, int *hier_ranks,
39         netpatterns_k_exchange_node_t *exchange_node)
40 {
41     /* local variables */
42     int i, j, cnt, i_temp;
43     int knt,knt2,kk, ex_node, stray;
44     int n_levels,pow_k;
45     int k_temp1;
46     int k_temp2;
47     int myid, reindex_myid = 0;
48     int base, peer_base,base_temp;
49     int peer;
50     int *prev_data = NULL;
51     int *current_data = NULL;
52     int *group_info = NULL;
53 
54 
55     NETPATTERNS_VERBOSE(
56             ("Enter ompi_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
57                 num_nodes, node_rank, tree_order));
58 
59     assert(num_nodes > 1);
60     assert(tree_order > 1);
61     if (tree_order > num_nodes) {
62         tree_order = num_nodes;
63     }
64 
65     /* k-nomial radix */
66     exchange_node->tree_order = tree_order;
67 
68     /* Calculate the number of levels in the tree for
69      * the largest power of tree_order less than or
70      * equal to the group size
71      */
72     n_levels = 0;
73     cnt=1;
74     while ( num_nodes > cnt ) {
75         cnt *= tree_order;
76         n_levels++;
77     }
78     /* this is the actual number of recusive k-ing steps
79      * we will perform, the last step may not be a full
80      * step depending on the outcome of the next conditional
81      */
82     pow_k = n_levels;
83 
84     /* figure out the largest power of tree_order that is less than or equal to
85      * num_nodes */
86     if ( cnt > num_nodes) {
87         cnt /= tree_order;
88         n_levels--;
89     }
90 
91     /*exchange_node->log_tree_order = n_levels;*/
92     exchange_node->log_tree_order = pow_k;
93     exchange_node->n_largest_pow_tree_order = cnt;
94 
95 
96     /* find the number of complete groups of size tree_order, tree_order^2, tree_order^3,...,tree_order^pow_k */
97     /* I don't think we need to cache this info this group_info array */
98     group_info = (int *) calloc(pow_k , sizeof(int));
99     group_info[0] = num_nodes/tree_order;
100     /*fprintf(stderr,"Number of complete groups of power 1 is %d\n",group_info[0]);*/
101     for ( i = 1; i < pow_k; i ++) {
102         group_info[i] = group_info[i-1]/tree_order;
103         /*fprintf(stderr,"Number of complete groups of power %d is %d\n",i+1,group_info[i]);*/
104 
105     }
106 
107     /* find number of incomplete groups and number of ranks belonging to those ranks */
108     knt=0;
109     while (knt <= (pow_k - 1) && group_info[knt] > 0) {
110         knt++;
111     }
112     knt--;
113     /*fprintf(stderr,"Maximal power of k is %d and the number of incomplete groups is %d \n", knt+1 ,tree_order - group_info[knt] );*/
114 
115     /* k_temp is a synonym for cnt which is the largest full power of k group */
116     /* now, start the calculation to find the first stray rank aka "extra" rank */
117     stray = 0;
118     /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt,
119                                                                            cnt*group_info[knt],
120                                                                            num_nodes - cnt*group_info[knt]);*/
121 
122 
123     /* cache this info, it's muy importante */
124     stray = cnt*group_info[knt];
125     exchange_node->k_nomial_stray = stray;
126 
127 
128 
129     /* before we do this, we need to first reindex */
130     /* reindexing phase */
131      /* this is the reindex phase */
132     exchange_node->reindex_map = (int *) malloc(num_nodes*sizeof(int));
133     /* this is the inverse map */
134     exchange_node->inv_reindex_map = (int *) malloc(num_nodes*sizeof(int));
135     /*int reindex_myid;*/
136     /* reindex */
137     if( stray < num_nodes ) {
138         /* find the first proxy rank */
139         peer = stray - cnt;
140         /* fix all ranks prior to this rank */
141         for( i = 0; i < peer; i++){
142             exchange_node->reindex_map[i] = i;
143         }
144         /* now, start the swap */
145         exchange_node->reindex_map[peer] = peer;
146         for( i = (peer+1); i < (peer + (num_nodes - stray)+1); i++) {
147             exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
148         }
149         i_temp = i;
150         for( i = i_temp; i < stray; i++) {
151             exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 1;
152         }
153         /* now, finish it off */
154         exchange_node->reindex_map[stray] = peer + 1;
155         for( i = (stray+1); i < num_nodes; i++) {
156             exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
157         }
158         /* debug print */
159         /*
160         for( i = 0; i < np; i++){
161             fprintf(stderr,"%d ",reindex_map[i]);
162         }
163         fprintf(stderr,"\n");
164         */
165     } else {
166         /* we have no extras, trivial reindexing */
167         for( i = 0; i < num_nodes; i++){
168             exchange_node->reindex_map[i] = i;
169         }
170     }
171     /* finished reindexing */
172 
173     /* Now, I need to get my rank in the new indexing */
174     for( i = 0; i < num_nodes; i++ ){
175         if( node_rank == exchange_node->reindex_map[i] ){
176             exchange_node->reindex_myid = i;
177             break;
178         }
179     }
180     /* Now, let's compute the inverse mapping here */
181     for( i = 0; i < num_nodes; i++){
182         j = 0;
183         while(exchange_node->reindex_map[j] != i ){
184             j++;
185         }
186         exchange_node->inv_reindex_map[i] = j;
187     }
188 
189 
190     /* Now we get the data sizes we should expect at each level */
191     /* now get the size of the data I am to receive from each peer */
192     /*int **payload_info;*/
193     prev_data = (int *) malloc( num_nodes*sizeof(int) );
194     if( NULL == prev_data ) {
195         goto Error;
196     }
197 
198     current_data = (int *) malloc( num_nodes*sizeof(int) );
199     if( NULL == current_data ) {
200         goto Error;
201     }
202 
203 
204     exchange_node->payload_info = (netpatterns_payload_t **) malloc(sizeof(netpatterns_payload_t *)*pow_k);
205     if( NULL == exchange_node->payload_info) {
206         goto Error;
207     }
208 
209     for(i = 0; i < pow_k; i++){
210         exchange_node->payload_info[i] = (netpatterns_payload_t *) malloc(sizeof(netpatterns_payload_t)*(tree_order-1));
211         if( NULL == exchange_node->payload_info[i]) {
212             goto Error;
213         }
214 
215     }
216     /* intialize the payload array
217        This is the money struct, just need to initialize this with
218        the subgroup information */
219     /*
220     for(i = 0; i < num_nodes; i++){
221         prev_data[i] = 1;
222         current_data[i] = 1;
223     }
224     */
225 
226     for(i = 0; i < num_nodes; i++){
227         prev_data[i] = hier_ranks[i];
228         current_data[i] = hier_ranks[i];
229     }
230 
231     /* everyone will need to do this loop over all ranks
232      * Phase I calculate the contribution from the extra ranks
233      */
234     for( myid = 0; myid < num_nodes; myid++) {
235         /* get my new rank */
236         for( j = 0; j < num_nodes; j++ ){
237             /* this will be satisfied for one of the indices */
238             if( myid == exchange_node->reindex_map[j] ){
239                 reindex_myid = j;
240                 break;
241             }
242         }
243 
244         for( j = stray; j < num_nodes; j++) {
245             if(reindex_myid == ( j - cnt )) {
246                 /* then this is a proxy rank */
247                 prev_data[myid] += prev_data[exchange_node->reindex_map[j]];
248                 break;
249             }
250 
251         }
252     }
253 
254     /* Phase II calculate the contribution from each recursive k - ing level
255      *
256      */
257     k_temp1 = tree_order; /* k^1 */
258     k_temp2 = 1;   /* k^0 */
259     peer_base = 0;
260     base_temp = 0;
261     for( i = 0; i < pow_k; i++) {
262         /* get my new rank */
263         for( myid = 0; myid < num_nodes; myid++){
264             current_data[myid] = prev_data[myid];
265             /*fprintf(stderr,"my current data at level %d is %d\n",i+1,current_data[myid]);*/
266             for( j = 0; j < num_nodes; j++ ){
267                 if( myid == exchange_node->reindex_map[j] ){
268                     reindex_myid = j;
269                     break;
270                 }
271             }
272             if( reindex_myid < stray ) {
273                 /* now start the actual algorithm */
274                 FIND_BASE(base,reindex_myid,i+1,tree_order);
275                 for( j = 0; j < ( tree_order - 1 ); j ++ ) {
276                     peer = base + (reindex_myid + k_temp2*(j+1))%k_temp1;
277                     if( peer < stray ) {
278                         /*fprintf(stderr,"getting %d bytes \n",prev_data[reindex_map[peer]]);*/
279                         /* then get the data */
280                         if( node_rank == myid ){
281                             exchange_node->payload_info[i][j].r_len = prev_data[exchange_node->reindex_map[peer]];
282                             /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_len %d\n",i,j,prev_data[exchange_node->reindex_map[peer]]);*/
283                             if( i > 0 ) {
284 
285                                 /* find my len and offset */
286                                 FIND_BASE(peer_base,peer,i,tree_order);
287                                 /* I do not want to mess with this, but it seems that I have no choice */
288                                ex_node = exchange_node->reindex_map[peer_base];
289                                /* now, find out how far down the line this guy really is */
290                                knt2 =0;
291                                for(kk = 0; kk < ex_node; kk++){
292                                    knt2 += hier_ranks[kk];
293                                }
294                                 exchange_node->payload_info[i][j].r_offset = knt2;
295                                 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
296 
297                                 FIND_BASE(base_temp,reindex_myid,i,tree_order);
298                                 ex_node = exchange_node->reindex_map[base_temp];
299                                 knt2 = 0;
300                                 for( kk = 0; kk < ex_node; kk++){
301                                     knt2 += hier_ranks[kk];
302                                 }
303                                 exchange_node->payload_info[i][j].s_offset =
304                                                                   knt2; /* exchange_node->reindex_map[base_temp]; */
305                                 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j,exchange_node->payload_info[i][j].s_offset);*/
306                             } else {
307                                 ex_node = exchange_node->reindex_map[peer];
308                                 knt2 =0;
309                                 for(kk = 0; kk < ex_node; kk++){
310                                     knt2 += hier_ranks[kk];
311                                 }
312                                 exchange_node->payload_info[i][j].r_offset =
313                                     knt2; /*exchange_node->reindex_map[peer]; */
314                                 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
315                                 knt2 = 0;
316                                 for(kk = 0; kk < myid; kk++){
317                                     knt2 += hier_ranks[kk];
318                                 }
319                                 exchange_node->payload_info[i][j].s_offset = knt2;
320                                 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j, exchange_node->payload_info[i][j].s_offset);*/
321                             }
322                             /* how much I am to receive from this peer on this level */
323                             /* how much I am to send to this peer on this level */
324                             exchange_node->payload_info[i][j].s_len = prev_data[node_rank];
325                             /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_len %d\n",i,j,prev_data[node_rank]);*/
326                             /*fprintf(stderr,"I am rank %d receiveing %d bytes from rank %d at level %d\n",node_rank,
327                                                                         prev_data[exchange_node->reindex_map[peer]],
328                                                                         exchange_node->reindex_map[peer], i+1);*/
329                             /*fprintf(stderr,"I am rank %d sending %d bytes to rank %d at level %d\n",node_rank,prev_data[myid],
330                                       exchange_node->reindex_map[peer],i+1);*/
331                         }
332 
333                         current_data[myid] += prev_data[exchange_node->reindex_map[peer]];
334                     }
335                 }
336             }
337 
338 
339         }
340         k_temp1 *= tree_order;
341         k_temp2 *= tree_order;
342         /* debug print */
343        /* fprintf(stderr,"Level %d current data ",i+1);*/
344         for( j = 0; j < num_nodes; j++){
345            /* fprintf(stderr,"%d ",current_data[j]); */
346             prev_data[j] = current_data[j];
347         }
348        /* fprintf(stderr,"\n");*/
349 
350     }
351 
352 
353     /* this is the natural way to do recursive k-ing */
354     /* should never have more than one extra rank per proxy */
355     if( exchange_node->reindex_myid >= stray ){
356         /*fprintf(stderr,"Rank %d is mapped onto proxy rank %d \n",exchange_node->reindex_myid,exchange_node->reindex_myid - cnt);*/
357         exchange_node->node_type = EXTRA_NODE;
358     } else {
359         exchange_node->node_type = EXCHANGE_NODE;
360     }
361 
362     /* set node characteristics - node that is not within the largest
363      * power of tree_order will just send its data to node that will participate
364      * in the recursive k-ing, and get the result back at the end.
365      * set the initial and final data exchanges - those that are not
366      * part of the recursive k-ing.
367      */
368     if (EXCHANGE_NODE == exchange_node->node_type)  {
369         exchange_node->n_extra_sources = 0;
370         for( i = stray; i < num_nodes; i++) {
371             if(exchange_node->reindex_myid == ( i - cnt )) {
372                 /* then I am a proxy rank and there is only a
373                  * single extra source
374                  */
375                 exchange_node->n_extra_sources = 1;
376                 break;
377             }
378         }
379 
380         if (exchange_node->n_extra_sources > 0) {
381             exchange_node->rank_extra_sources_array = (int *) malloc
382                 (exchange_node->n_extra_sources * sizeof(int));
383             if( NULL == exchange_node->rank_extra_sources_array ) {
384                 goto Error;
385             }
386             /* you broke above */
387             exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[i];
388         } else {
389             exchange_node->rank_extra_sources_array = NULL;
390         }
391     } else {
392         /* I am an extra rank, find my proxy rank */
393         exchange_node->n_extra_sources = 1;
394 
395         exchange_node->rank_extra_sources_array = (int *) malloc
396             (exchange_node->n_extra_sources * sizeof(int));
397         if( NULL == exchange_node->rank_extra_sources_array ) {
398             goto Error;
399         }
400         exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[exchange_node->reindex_myid - cnt];
401     }
402 
403 
404     /* set the exchange pattern */
405     if (EXCHANGE_NODE == exchange_node->node_type) {
406         /* yep, that's right PLUS 1 */
407         exchange_node->n_exchanges = n_levels + 1;
408         /* initialize this */
409         exchange_node->n_actual_exchanges = 0;
410         /* Allocate 2 dimension array thak keeps
411          rank exchange information for each step*/
412         exchange_node->rank_exchanges = (int **) malloc
413             (exchange_node->n_exchanges * sizeof(int *));
414         if(NULL == exchange_node->rank_exchanges) {
415             goto Error;
416         }
417         for (i = 0; i < exchange_node->n_exchanges; i++) {
418             exchange_node->rank_exchanges[i] = (int *) malloc
419                 ((tree_order - 1) * sizeof(int));
420             if( NULL == exchange_node->rank_exchanges ) {
421                 goto Error;
422             }
423         }
424         k_temp1 = tree_order;
425         k_temp2 = 1;
426         /* fill in exchange partners */
427         /* Ok, now we start with the actual algorithm */
428         for( i = 0; i < exchange_node->n_exchanges; i ++) {
429             /*fprintf(stderr,"Starting Level %d\n",i+1);*/
430 
431             FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
432             /*fprintf(stderr,"Myid %d base %d\n",node_rank,base);*/
433             for( j = 0; j < (tree_order-1); j ++ ) {
434                 peer = base + (exchange_node->reindex_myid + k_temp2*(j+1))%k_temp1;
435                 if ( peer < stray ) {
436                     exchange_node->rank_exchanges[i][j] = exchange_node->reindex_map[peer];
437                     /* an actual exchange occurs, bump the counter */
438 
439                 } else {
440                     /* out of range, skip it - do not bump the n_actual_exchanges counter */
441                     exchange_node->rank_exchanges[i][j] = -1;
442                 }
443 
444             }
445             k_temp1 *= tree_order;
446             k_temp2 *= tree_order;
447         }
448         for(i = 0; i < pow_k; i++){
449             for(j = 0; j < (tree_order-1); j++){
450                 if(-1 != exchange_node->rank_exchanges[i][j]){
451                     /* then bump the counter */
452                     exchange_node->n_actual_exchanges++;
453                 }
454             }
455         }
456 
457     } else {
458         /* we are extra ranks and we don't participate in the exchange :( */
459         exchange_node->n_exchanges=0;
460         exchange_node->rank_exchanges=NULL;
461     }
462 
463 
464     /* set the number of tags needed per stripe - this must be the
465      *   same across all procs in the communicator.
466      */
467     /* do we need this one */
468     exchange_node->n_tags = tree_order * n_levels + 1;
469 
470     free(prev_data);
471     free(current_data);
472     free(group_info);
473 
474     /* successful return */
475     return OMPI_SUCCESS;
476 
477 Error:
478 
479     if (NULL != exchange_node->rank_extra_sources_array) {
480         free(exchange_node->rank_extra_sources_array);
481     }
482 
483     if (NULL != exchange_node->rank_exchanges) {
484         for (i = 0; i < exchange_node->n_exchanges; i++) {
485             if (NULL != exchange_node->rank_exchanges[i]) {
486                 free(exchange_node->rank_exchanges[i]);
487             }
488         }
489         free(exchange_node->rank_exchanges);
490     }
491 
492     if (NULL != prev_data ){
493         free(prev_data);
494     }
495 
496     if(NULL != current_data) {
497         free(current_data);
498     }
499 
500     if(NULL != group_info) {
501         free(group_info);
502     }
503 
504     /* error return */
505     return OMPI_ERROR;
506 }
507 
ompi_netpatterns_cleanup_recursive_knomial_allgather_tree_node(netpatterns_k_exchange_node_t * exchange_node)508 OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_knomial_allgather_tree_node(
509         netpatterns_k_exchange_node_t *exchange_node)
510 {
511     int i;
512 
513     free(exchange_node->reindex_map);
514     free(exchange_node->inv_reindex_map);
515     if (exchange_node->n_extra_sources > 0) {
516         free(exchange_node->rank_extra_sources_array) ;
517         exchange_node->n_extra_sources = 0;
518         exchange_node->rank_extra_sources_array = NULL;
519     }
520     if (exchange_node->n_exchanges > 0) {
521         for (i=0; i < exchange_node->n_exchanges; i++) {
522             free(exchange_node->rank_exchanges[i]);
523             exchange_node->rank_exchanges[i] = NULL;
524         }
525         free(exchange_node->rank_exchanges);
526         exchange_node->rank_exchanges = NULL;
527         exchange_node->n_exchanges = 0;
528     }
529     for(i = 0; i < exchange_node->log_tree_order; i++){
530         free(exchange_node->payload_info[i]);
531     }
532     free(exchange_node->payload_info);
533 }
534 
ompi_netpatterns_setup_recursive_knomial_tree_node(int num_nodes,int node_rank,int tree_order,netpatterns_k_exchange_node_t * exchange_node)535 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_knomial_tree_node(
536         int num_nodes, int node_rank, int tree_order,
537         netpatterns_k_exchange_node_t *exchange_node)
538 {
539     /* local variables */
540     int i, j, tmp, cnt;
541     int n_levels;
542     int k_base, kpow_num, peer;
543 
544     NETPATTERNS_VERBOSE(
545             ("Enter ompi_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
546                 num_nodes, node_rank, tree_order));
547 
548     assert(num_nodes > 1);
549     assert(tree_order > 1);
550     if (tree_order > num_nodes) {
551         tree_order = num_nodes;
552     }
553 
554     exchange_node->tree_order = tree_order;
555 
556     /* figure out number of levels in the tree */
557     n_levels = 0;
558     /* cnt - number of ranks in given level */
559     cnt=1;
560     while ( num_nodes > cnt ) {
561         cnt *= tree_order;
562         n_levels++;
563     };
564 
565     /* figure out the largest power of tree_order that is less than or equal to
566      * num_nodes */
567     if ( cnt > num_nodes) {
568         cnt /= tree_order;
569         n_levels--;
570     }
571 
572     exchange_node->log_tree_order = n_levels;
573     exchange_node->n_largest_pow_tree_order = cnt;
574 
575     /* set node characteristics - node that is not within the largest
576      *  power of tree_order will just send it's data to node that will participate
577      *  in the recursive doubling, and get the result back at the end.
578      */
579     if (node_rank + 1 > cnt) {
580         exchange_node->node_type = EXTRA_NODE;
581     } else {
582         exchange_node->node_type = EXCHANGE_NODE;
583     }
584 
585 
586     /* set the initial and final data exchanges - those that are not
587      *   part of the recursive doubling.
588      */
589     if (EXCHANGE_NODE == exchange_node->node_type)  {
590         exchange_node->n_extra_sources = 0;
591         for (i = 0, tmp = node_rank * (tree_order - 1) + cnt + i;
592                 tmp < num_nodes && i < tree_order - 1;
593                 ++i, ++tmp) {
594             ++exchange_node->n_extra_sources;
595         }
596 
597         assert(exchange_node->n_extra_sources < tree_order);
598 
599         if (exchange_node->n_extra_sources > 0) {
600             exchange_node->rank_extra_sources_array = (int *) malloc
601                 (exchange_node->n_extra_sources * sizeof(int));
602             if( NULL == exchange_node->rank_extra_sources_array ) {
603                 goto Error;
604             }
605             for (i = 0, tmp = node_rank * (tree_order - 1) + cnt;
606                     i < tree_order - 1 && tmp < num_nodes; ++i, ++tmp) {
607                 NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
608                 exchange_node->rank_extra_sources_array[i] = tmp;
609             }
610         } else {
611             exchange_node->rank_extra_sources_array = NULL;
612         }
613     } else {
614         exchange_node->n_extra_sources = 1;
615         exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
616         if( NULL == exchange_node->rank_extra_sources_array ) {
617             goto Error;
618         }
619         exchange_node->rank_extra_sources_array[0] = (node_rank - cnt) / (tree_order - 1);
620         NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0,
621                     exchange_node->rank_extra_sources_array[0] ));
622     }
623 
624     /* set the exchange pattern */
625     if (EXCHANGE_NODE == exchange_node->node_type) {
626         exchange_node->n_exchanges = n_levels;
627         /* Allocate 2 dimension array thak keeps
628          rank exchange information for each step*/
629         exchange_node->rank_exchanges = (int **) malloc
630             (exchange_node->n_exchanges * sizeof(int *));
631         if(NULL == exchange_node->rank_exchanges) {
632             goto Error;
633         }
634         for (i = 0; i < exchange_node->n_exchanges; i++) {
635             exchange_node->rank_exchanges[i] = (int *) malloc
636                 ((tree_order - 1) * sizeof(int));
637             if( NULL == exchange_node->rank_exchanges ) {
638                 goto Error;
639             }
640         }
641         /* fill in exchange partners */
642         for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges;
643                                       i++, kpow_num *= tree_order) {
644             k_base = node_rank / (kpow_num * tree_order);
645             for(j = 1; j < tree_order; j++) {
646                 peer = node_rank + kpow_num * j;
647                 if (k_base != peer/(kpow_num * tree_order)) {
648                     /* Wraparound the number */
649                     peer = k_base * (kpow_num * tree_order)  +
650                         peer % (kpow_num * tree_order);
651                 }
652                 exchange_node->rank_exchanges[i][j - 1] = peer;
653                 NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d",
654                             i, j, tree_order, peer));
655             }
656         }
657     } else {
658         exchange_node->n_exchanges=0;
659         exchange_node->rank_exchanges=NULL;
660     }
661 
662     /* set the number of tags needed per stripe - this must be the
663      *   same across all procs in the communicator.
664      */
665     /* do we need this one */
666     exchange_node->n_tags = tree_order * n_levels + 1;
667 
668     /* successful return */
669     return OMPI_SUCCESS;
670 
671 Error:
672 
673     ompi_netpatterns_cleanup_recursive_knomial_tree_node (exchange_node);
674 
675     /* error return */
676     return OMPI_ERROR;
677 }
678 
ompi_netpatterns_cleanup_recursive_knomial_tree_node(netpatterns_k_exchange_node_t * exchange_node)679 OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_knomial_tree_node(
680         netpatterns_k_exchange_node_t *exchange_node)
681 {
682     int i;
683 
684     if (exchange_node->n_extra_sources > 0) {
685         free(exchange_node->rank_extra_sources_array);
686         exchange_node->rank_extra_sources_array = NULL;
687         exchange_node->n_extra_sources = 0;
688     }
689     if (exchange_node->n_exchanges > 0) {
690         for (i=0 ; i<exchange_node->n_exchanges; i++) {
691             free(exchange_node->rank_exchanges[i]);
692             exchange_node->rank_exchanges[i] = NULL;
693         }
694         free(exchange_node->rank_exchanges);
695         exchange_node->rank_exchanges = NULL;
696         exchange_node->n_exchanges = 0;
697     }
698 }
699 
700 #if 1
ompi_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes,int node_rank,int tree_order,netpatterns_pair_exchange_node_t * exchange_node)701 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
702         netpatterns_pair_exchange_node_t *exchange_node)
703 {
704     /* local variables */
705     int i, tmp, cnt;
706     int n_levels;
707     int shift, mask;
708 
709     NETPATTERNS_VERBOSE(("Enter ompi_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order));
710 
711     assert(num_nodes > 1);
712     while (tree_order > num_nodes) {
713         tree_order /= 2;
714     }
715 
716     exchange_node->tree_order = tree_order;
717     /* We support only tree_order that are power of two */
718     assert(0 == (tree_order & (tree_order - 1)));
719 
720     /* figure out number of levels in the tree */
721     n_levels = 0;
722     /* cnt - number of ranks in given level */
723     cnt=1;
724     while ( num_nodes > cnt ) {
725         cnt *= tree_order;
726         n_levels++;
727     };
728 
729     /* figure out the largest power of tree_order that is less than or equal to
730      * num_nodes */
731     if ( cnt > num_nodes) {
732         cnt /= tree_order;
733         n_levels--;
734     }
735     exchange_node->log_tree_order = n_levels;
736     if (2 == tree_order) {
737         exchange_node->log_2 = exchange_node->log_tree_order;
738     }
739 
740     tmp=1;
741     for (i=0 ; i < n_levels ; i++ ) {
742         tmp *= tree_order;
743     }
744     /* Ishai: I see no reason for calculating tmp. Add an assert before deleting it */
745     assert(tmp == cnt);
746 
747     exchange_node->n_largest_pow_tree_order = tmp;
748     if (2 == tree_order) {
749         exchange_node->n_largest_pow_2 = exchange_node->n_largest_pow_tree_order;
750     }
751 
752     /* set node characteristics - node that is not within the largest
753      *  power of tree_order will just send it's data to node that will participate
754      *  in the recursive doubling, and get the result back at the end.
755      */
756     if ( node_rank + 1 > cnt ) {
757         exchange_node->node_type = EXTRA_NODE;
758     } else {
759         exchange_node->node_type = EXCHANGE_NODE;
760     }
761 
762     /* set the initial and final data exchanges - those that are not
763      *   part of the recursive doubling.
764      */
765     if ( EXCHANGE_NODE == exchange_node->node_type ) {
766         exchange_node->n_extra_sources = 0;
767         for (tmp = node_rank + cnt; tmp < num_nodes; tmp += cnt) {
768             ++exchange_node->n_extra_sources;
769         }
770         if (exchange_node->n_extra_sources > 0) {
771             exchange_node->rank_extra_sources_array = (int *) malloc
772                 (exchange_node->n_extra_sources * sizeof(int));
773             if( NULL == exchange_node->rank_extra_sources_array ) {
774                 goto Error;
775             }
776             for (i = 0, tmp = node_rank + cnt; tmp < num_nodes; ++i, tmp += cnt) {
777                 NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
778                 exchange_node->rank_extra_sources_array[i] = tmp;
779             }
780         } else {
781             exchange_node->rank_extra_sources_array = NULL;
782         }
783     } else {
784         exchange_node->n_extra_sources = 1;
785         exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
786         if( NULL == exchange_node->rank_extra_sources_array ) {
787             goto Error;
788         }
789         exchange_node->rank_extra_sources_array[0] = node_rank & (cnt - 1);
790         NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0, node_rank & (cnt - 1)));
791     }
792 
793     /* Ishai: To be compatable with the old structure - should be remoived later */
794     if (1 == exchange_node->n_extra_sources) {
795         exchange_node->rank_extra_source = exchange_node->rank_extra_sources_array[0];
796     } else {
797         exchange_node->rank_extra_source = -1;
798     }
799 
800     /* set the exchange pattern */
801     if ( EXCHANGE_NODE == exchange_node->node_type ) {
802         exchange_node->n_exchanges = n_levels * (tree_order - 1);
803         exchange_node->rank_exchanges = (int *) malloc
804             (exchange_node->n_exchanges * sizeof(int));
805         if( NULL == exchange_node->rank_exchanges ) {
806             goto Error;
807         }
808 
809         /* fill in exchange partners */
810         for ( i = 0, shift = 1 ; i < exchange_node->n_exchanges ; shift *= tree_order ) {
811             for ( mask = 1 ; mask < tree_order ; ++mask, ++i ) {
812                 exchange_node->rank_exchanges[i] = node_rank ^ (mask * shift);
813                 NETPATTERNS_VERBOSE(("rank_exchanges#%d/%d = %d", i, tree_order, node_rank ^ (mask * shift)));
814             }
815         }
816 
817     } else {
818 
819         exchange_node->n_exchanges=0;
820         exchange_node->rank_exchanges=NULL;
821 
822     }
823 
824     /* set the number of tags needed per stripe - this must be the
825      *   same across all procs in the communicator.
826      */
827     /* Ishai: Need to find out what is n_tags */
828     exchange_node->n_tags = tree_order * n_levels + 1;
829 
830     /* successful return */
831     return OMPI_SUCCESS;
832 
833 Error:
834     if (exchange_node->rank_extra_sources_array != NULL) {
835         free(exchange_node->rank_extra_sources_array);
836     }
837 
838     /* error return */
839     return OMPI_ERROR;
840 }
841 
ompi_netpatterns_cleanup_recursive_doubling_tree_node(netpatterns_pair_exchange_node_t * exchange_node)842 OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_doubling_tree_node(
843     netpatterns_pair_exchange_node_t *exchange_node)
844 {
845     NETPATTERNS_VERBOSE(("About to release rank_extra_sources_array and rank_exchanges"));
846     if (exchange_node->rank_extra_sources_array != NULL) {
847         free(exchange_node->rank_extra_sources_array);
848     }
849 
850     if (exchange_node->rank_exchanges != NULL) {
851         free(exchange_node->rank_exchanges);
852     }
853 }
854 #endif
855 
ompi_netpatterns_setup_recursive_doubling_tree_node(int num_nodes,int node_rank,netpatterns_pair_exchange_node_t * exchange_node)856 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
857         netpatterns_pair_exchange_node_t *exchange_node)
858 {
859     return ompi_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node);
860 }
861 
862 #if 0
863 /*OMPI_DECLSPEC int old_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/
864 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order,
865         netpatterns_pair_exchange_node_t *exchange_node)
866 {
867     /* local variables */
868     /*int tree_order;*/
869     int i,tmp,cnt,result,n_extra_nodes;
870     int n_exchanges;
871 
872     /* figure out number of levels in the tree */
873 
874     n_exchanges=0;
875     result=num_nodes;
876 /*    tree_order=2;*/
877     /* cnt - number of ranks in given level */
878     cnt=1;
879     while( num_nodes > cnt ) {
880         cnt*=tree_order;
881         n_exchanges++;
882     };
883 
884     /* figure out the largest power of 2 that is less than or equal to
885      * num_nodes */
886     if( cnt > num_nodes) {
887         cnt/=tree_order;
888         n_exchanges--;
889     }
890     exchange_node->log_2=n_exchanges;
891 
892     tmp=1;
893     for(i=0 ; i < n_exchanges ; i++ ) {
894         tmp*=2;
895     }
896     exchange_node->n_largest_pow_2=tmp;
897 
898     /* set node characteristics - node that is not within the largest
899      *  power of 2 will just send it's data to node that will participate
900      *  in the recursive doubling, and get the result back at the end.
901      */
902     if( node_rank+1 > cnt ) {
903         exchange_node->node_type=EXTRA_NODE;
904     } else {
905         exchange_node->node_type=EXCHANGE_NODE;
906     }
907 
908     /* set the initial and final data exchanges - those that are not
909      *   part of the recursive doubling.
910      */
911     n_extra_nodes=num_nodes-cnt;
912 
913     if ( EXCHANGE_NODE == exchange_node->node_type ) {
914 
915         if( node_rank < n_extra_nodes ) {
916             exchange_node->n_extra_sources=1;
917             exchange_node->rank_extra_source=cnt+node_rank;
918         } else {
919             exchange_node->n_extra_sources=0;
920             exchange_node->rank_extra_source=-1;
921         }
922 
923     } else {
924             exchange_node->n_extra_sources=1;
925             exchange_node->rank_extra_source=node_rank-cnt;
926     }
927 
928     /* set the exchange pattern */
929     if( EXCHANGE_NODE == exchange_node->node_type ) {
930 
931         exchange_node->n_exchanges=n_exchanges;
932         exchange_node->rank_exchanges=(int *) malloc
933             (n_exchanges*sizeof(int));
934         if( NULL == exchange_node->rank_exchanges ) {
935             goto Error;
936         }
937 
938         /* fill in exchange partners */
939         result=1;
940         tmp=node_rank;
941         for( i=0 ; i < n_exchanges ; i++ ) {
942             if(tmp & 1 ) {
943                 exchange_node->rank_exchanges[i]=
944                     node_rank-result;
945             } else {
946                 exchange_node->rank_exchanges[i]=
947                     node_rank+result;
948             }
949             result*=2;
950             tmp/=2;
951         }
952 
953     } else {
954 
955         exchange_node->n_exchanges=0;
956         exchange_node->rank_exchanges=NULL;
957 
958     }
959 
960     /* set the number of tags needed per stripe - this must be the
961      *   same across all procs in the communicator.
962      */
963     exchange_node->n_tags=2*n_exchanges+1;
964 
965     /* Ishai: to make sure free will work also for people that call this function */
966     exchange_node->rank_extra_sources_array = NULL;
967 
968     /* successful return */
969     return OMPI_SUCCESS;
970 
971 Error:
972 
973     /* error return */
974     return OMPI_ERROR;
975 }
976 #endif
977 
978