1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
5 * Copyright (c) 2014 Research Organization for Information Science
6 * and Technology (RIST). All rights reserved.
7 * Copyright (c) 2014 Los Alamos National Security, LLC. All rights
8 * reserved.
9 * Copyright (c) 2017 IBM Corporation. All rights reserved.
10 * $COPYRIGHT$
11 *
12 * Additional copyrights may follow
13 *
14 * $HEADER$
15 */
16
17 #include "ompi_config.h"
18 #ifdef HAVE_UNISTD_H
19 #include <unistd.h>
20 #endif
21 #include <sys/types.h>
22 #ifdef HAVE_SYS_MMAN_H
23 #include <sys/mman.h>
24 #endif
25 #include <fcntl.h>
26 #include <stdlib.h>
27 #include <assert.h>
28
29 #include "ompi/constants.h"
30
31 #include "ompi/mca/rte/rte.h"
32
33 #include "netpatterns.h"
34
35 /* setup recursive doubleing tree node */
36
ompi_netpatterns_setup_recursive_knomial_allgather_tree_node(int num_nodes,int node_rank,int tree_order,int * hier_ranks,netpatterns_k_exchange_node_t * exchange_node)37 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_knomial_allgather_tree_node(
38 int num_nodes, int node_rank, int tree_order, int *hier_ranks,
39 netpatterns_k_exchange_node_t *exchange_node)
40 {
41 /* local variables */
42 int i, j, cnt, i_temp;
43 int knt,knt2,kk, ex_node, stray;
44 int n_levels,pow_k;
45 int k_temp1;
46 int k_temp2;
47 int myid, reindex_myid = 0;
48 int base, peer_base,base_temp;
49 int peer;
50 int *prev_data = NULL;
51 int *current_data = NULL;
52 int *group_info = NULL;
53
54
55 NETPATTERNS_VERBOSE(
56 ("Enter ompi_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
57 num_nodes, node_rank, tree_order));
58
59 assert(num_nodes > 1);
60 assert(tree_order > 1);
61 if (tree_order > num_nodes) {
62 tree_order = num_nodes;
63 }
64
65 /* k-nomial radix */
66 exchange_node->tree_order = tree_order;
67
68 /* Calculate the number of levels in the tree for
69 * the largest power of tree_order less than or
70 * equal to the group size
71 */
72 n_levels = 0;
73 cnt=1;
74 while ( num_nodes > cnt ) {
75 cnt *= tree_order;
76 n_levels++;
77 }
78 /* this is the actual number of recusive k-ing steps
79 * we will perform, the last step may not be a full
80 * step depending on the outcome of the next conditional
81 */
82 pow_k = n_levels;
83
84 /* figure out the largest power of tree_order that is less than or equal to
85 * num_nodes */
86 if ( cnt > num_nodes) {
87 cnt /= tree_order;
88 n_levels--;
89 }
90
91 /*exchange_node->log_tree_order = n_levels;*/
92 exchange_node->log_tree_order = pow_k;
93 exchange_node->n_largest_pow_tree_order = cnt;
94
95
96 /* find the number of complete groups of size tree_order, tree_order^2, tree_order^3,...,tree_order^pow_k */
97 /* I don't think we need to cache this info this group_info array */
98 group_info = (int *) calloc(pow_k , sizeof(int));
99 group_info[0] = num_nodes/tree_order;
100 /*fprintf(stderr,"Number of complete groups of power 1 is %d\n",group_info[0]);*/
101 for ( i = 1; i < pow_k; i ++) {
102 group_info[i] = group_info[i-1]/tree_order;
103 /*fprintf(stderr,"Number of complete groups of power %d is %d\n",i+1,group_info[i]);*/
104
105 }
106
107 /* find number of incomplete groups and number of ranks belonging to those ranks */
108 knt=0;
109 while (knt <= (pow_k - 1) && group_info[knt] > 0) {
110 knt++;
111 }
112 knt--;
113 /*fprintf(stderr,"Maximal power of k is %d and the number of incomplete groups is %d \n", knt+1 ,tree_order - group_info[knt] );*/
114
115 /* k_temp is a synonym for cnt which is the largest full power of k group */
116 /* now, start the calculation to find the first stray rank aka "extra" rank */
117 stray = 0;
118 /*fprintf(stderr,"Maximal power of k %d, first stragler rank is %d and the number of straglers is %d\n",cnt,
119 cnt*group_info[knt],
120 num_nodes - cnt*group_info[knt]);*/
121
122
123 /* cache this info, it's muy importante */
124 stray = cnt*group_info[knt];
125 exchange_node->k_nomial_stray = stray;
126
127
128
129 /* before we do this, we need to first reindex */
130 /* reindexing phase */
131 /* this is the reindex phase */
132 exchange_node->reindex_map = (int *) malloc(num_nodes*sizeof(int));
133 /* this is the inverse map */
134 exchange_node->inv_reindex_map = (int *) malloc(num_nodes*sizeof(int));
135 /*int reindex_myid;*/
136 /* reindex */
137 if( stray < num_nodes ) {
138 /* find the first proxy rank */
139 peer = stray - cnt;
140 /* fix all ranks prior to this rank */
141 for( i = 0; i < peer; i++){
142 exchange_node->reindex_map[i] = i;
143 }
144 /* now, start the swap */
145 exchange_node->reindex_map[peer] = peer;
146 for( i = (peer+1); i < (peer + (num_nodes - stray)+1); i++) {
147 exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
148 }
149 i_temp = i;
150 for( i = i_temp; i < stray; i++) {
151 exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 1;
152 }
153 /* now, finish it off */
154 exchange_node->reindex_map[stray] = peer + 1;
155 for( i = (stray+1); i < num_nodes; i++) {
156 exchange_node->reindex_map[i] = exchange_node->reindex_map[i-1] + 2;
157 }
158 /* debug print */
159 /*
160 for( i = 0; i < np; i++){
161 fprintf(stderr,"%d ",reindex_map[i]);
162 }
163 fprintf(stderr,"\n");
164 */
165 } else {
166 /* we have no extras, trivial reindexing */
167 for( i = 0; i < num_nodes; i++){
168 exchange_node->reindex_map[i] = i;
169 }
170 }
171 /* finished reindexing */
172
173 /* Now, I need to get my rank in the new indexing */
174 for( i = 0; i < num_nodes; i++ ){
175 if( node_rank == exchange_node->reindex_map[i] ){
176 exchange_node->reindex_myid = i;
177 break;
178 }
179 }
180 /* Now, let's compute the inverse mapping here */
181 for( i = 0; i < num_nodes; i++){
182 j = 0;
183 while(exchange_node->reindex_map[j] != i ){
184 j++;
185 }
186 exchange_node->inv_reindex_map[i] = j;
187 }
188
189
190 /* Now we get the data sizes we should expect at each level */
191 /* now get the size of the data I am to receive from each peer */
192 /*int **payload_info;*/
193 prev_data = (int *) malloc( num_nodes*sizeof(int) );
194 if( NULL == prev_data ) {
195 goto Error;
196 }
197
198 current_data = (int *) malloc( num_nodes*sizeof(int) );
199 if( NULL == current_data ) {
200 goto Error;
201 }
202
203
204 exchange_node->payload_info = (netpatterns_payload_t **) malloc(sizeof(netpatterns_payload_t *)*pow_k);
205 if( NULL == exchange_node->payload_info) {
206 goto Error;
207 }
208
209 for(i = 0; i < pow_k; i++){
210 exchange_node->payload_info[i] = (netpatterns_payload_t *) malloc(sizeof(netpatterns_payload_t)*(tree_order-1));
211 if( NULL == exchange_node->payload_info[i]) {
212 goto Error;
213 }
214
215 }
216 /* intialize the payload array
217 This is the money struct, just need to initialize this with
218 the subgroup information */
219 /*
220 for(i = 0; i < num_nodes; i++){
221 prev_data[i] = 1;
222 current_data[i] = 1;
223 }
224 */
225
226 for(i = 0; i < num_nodes; i++){
227 prev_data[i] = hier_ranks[i];
228 current_data[i] = hier_ranks[i];
229 }
230
231 /* everyone will need to do this loop over all ranks
232 * Phase I calculate the contribution from the extra ranks
233 */
234 for( myid = 0; myid < num_nodes; myid++) {
235 /* get my new rank */
236 for( j = 0; j < num_nodes; j++ ){
237 /* this will be satisfied for one of the indices */
238 if( myid == exchange_node->reindex_map[j] ){
239 reindex_myid = j;
240 break;
241 }
242 }
243
244 for( j = stray; j < num_nodes; j++) {
245 if(reindex_myid == ( j - cnt )) {
246 /* then this is a proxy rank */
247 prev_data[myid] += prev_data[exchange_node->reindex_map[j]];
248 break;
249 }
250
251 }
252 }
253
254 /* Phase II calculate the contribution from each recursive k - ing level
255 *
256 */
257 k_temp1 = tree_order; /* k^1 */
258 k_temp2 = 1; /* k^0 */
259 peer_base = 0;
260 base_temp = 0;
261 for( i = 0; i < pow_k; i++) {
262 /* get my new rank */
263 for( myid = 0; myid < num_nodes; myid++){
264 current_data[myid] = prev_data[myid];
265 /*fprintf(stderr,"my current data at level %d is %d\n",i+1,current_data[myid]);*/
266 for( j = 0; j < num_nodes; j++ ){
267 if( myid == exchange_node->reindex_map[j] ){
268 reindex_myid = j;
269 break;
270 }
271 }
272 if( reindex_myid < stray ) {
273 /* now start the actual algorithm */
274 FIND_BASE(base,reindex_myid,i+1,tree_order);
275 for( j = 0; j < ( tree_order - 1 ); j ++ ) {
276 peer = base + (reindex_myid + k_temp2*(j+1))%k_temp1;
277 if( peer < stray ) {
278 /*fprintf(stderr,"getting %d bytes \n",prev_data[reindex_map[peer]]);*/
279 /* then get the data */
280 if( node_rank == myid ){
281 exchange_node->payload_info[i][j].r_len = prev_data[exchange_node->reindex_map[peer]];
282 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_len %d\n",i,j,prev_data[exchange_node->reindex_map[peer]]);*/
283 if( i > 0 ) {
284
285 /* find my len and offset */
286 FIND_BASE(peer_base,peer,i,tree_order);
287 /* I do not want to mess with this, but it seems that I have no choice */
288 ex_node = exchange_node->reindex_map[peer_base];
289 /* now, find out how far down the line this guy really is */
290 knt2 =0;
291 for(kk = 0; kk < ex_node; kk++){
292 knt2 += hier_ranks[kk];
293 }
294 exchange_node->payload_info[i][j].r_offset = knt2;
295 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
296
297 FIND_BASE(base_temp,reindex_myid,i,tree_order);
298 ex_node = exchange_node->reindex_map[base_temp];
299 knt2 = 0;
300 for( kk = 0; kk < ex_node; kk++){
301 knt2 += hier_ranks[kk];
302 }
303 exchange_node->payload_info[i][j].s_offset =
304 knt2; /* exchange_node->reindex_map[base_temp]; */
305 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j,exchange_node->payload_info[i][j].s_offset);*/
306 } else {
307 ex_node = exchange_node->reindex_map[peer];
308 knt2 =0;
309 for(kk = 0; kk < ex_node; kk++){
310 knt2 += hier_ranks[kk];
311 }
312 exchange_node->payload_info[i][j].r_offset =
313 knt2; /*exchange_node->reindex_map[peer]; */
314 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].r_offset %d\n",i,j,exchange_node->payload_info[i][j].r_offset);*/
315 knt2 = 0;
316 for(kk = 0; kk < myid; kk++){
317 knt2 += hier_ranks[kk];
318 }
319 exchange_node->payload_info[i][j].s_offset = knt2;
320 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_offset %d\n",i,j, exchange_node->payload_info[i][j].s_offset);*/
321 }
322 /* how much I am to receive from this peer on this level */
323 /* how much I am to send to this peer on this level */
324 exchange_node->payload_info[i][j].s_len = prev_data[node_rank];
325 /*fprintf(stderr,"exchange_node->payload_info[%d][%d].s_len %d\n",i,j,prev_data[node_rank]);*/
326 /*fprintf(stderr,"I am rank %d receiveing %d bytes from rank %d at level %d\n",node_rank,
327 prev_data[exchange_node->reindex_map[peer]],
328 exchange_node->reindex_map[peer], i+1);*/
329 /*fprintf(stderr,"I am rank %d sending %d bytes to rank %d at level %d\n",node_rank,prev_data[myid],
330 exchange_node->reindex_map[peer],i+1);*/
331 }
332
333 current_data[myid] += prev_data[exchange_node->reindex_map[peer]];
334 }
335 }
336 }
337
338
339 }
340 k_temp1 *= tree_order;
341 k_temp2 *= tree_order;
342 /* debug print */
343 /* fprintf(stderr,"Level %d current data ",i+1);*/
344 for( j = 0; j < num_nodes; j++){
345 /* fprintf(stderr,"%d ",current_data[j]); */
346 prev_data[j] = current_data[j];
347 }
348 /* fprintf(stderr,"\n");*/
349
350 }
351
352
353 /* this is the natural way to do recursive k-ing */
354 /* should never have more than one extra rank per proxy */
355 if( exchange_node->reindex_myid >= stray ){
356 /*fprintf(stderr,"Rank %d is mapped onto proxy rank %d \n",exchange_node->reindex_myid,exchange_node->reindex_myid - cnt);*/
357 exchange_node->node_type = EXTRA_NODE;
358 } else {
359 exchange_node->node_type = EXCHANGE_NODE;
360 }
361
362 /* set node characteristics - node that is not within the largest
363 * power of tree_order will just send its data to node that will participate
364 * in the recursive k-ing, and get the result back at the end.
365 * set the initial and final data exchanges - those that are not
366 * part of the recursive k-ing.
367 */
368 if (EXCHANGE_NODE == exchange_node->node_type) {
369 exchange_node->n_extra_sources = 0;
370 for( i = stray; i < num_nodes; i++) {
371 if(exchange_node->reindex_myid == ( i - cnt )) {
372 /* then I am a proxy rank and there is only a
373 * single extra source
374 */
375 exchange_node->n_extra_sources = 1;
376 break;
377 }
378 }
379
380 if (exchange_node->n_extra_sources > 0) {
381 exchange_node->rank_extra_sources_array = (int *) malloc
382 (exchange_node->n_extra_sources * sizeof(int));
383 if( NULL == exchange_node->rank_extra_sources_array ) {
384 goto Error;
385 }
386 /* you broke above */
387 exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[i];
388 } else {
389 exchange_node->rank_extra_sources_array = NULL;
390 }
391 } else {
392 /* I am an extra rank, find my proxy rank */
393 exchange_node->n_extra_sources = 1;
394
395 exchange_node->rank_extra_sources_array = (int *) malloc
396 (exchange_node->n_extra_sources * sizeof(int));
397 if( NULL == exchange_node->rank_extra_sources_array ) {
398 goto Error;
399 }
400 exchange_node->rank_extra_sources_array[0] = exchange_node->reindex_map[exchange_node->reindex_myid - cnt];
401 }
402
403
404 /* set the exchange pattern */
405 if (EXCHANGE_NODE == exchange_node->node_type) {
406 /* yep, that's right PLUS 1 */
407 exchange_node->n_exchanges = n_levels + 1;
408 /* initialize this */
409 exchange_node->n_actual_exchanges = 0;
410 /* Allocate 2 dimension array thak keeps
411 rank exchange information for each step*/
412 exchange_node->rank_exchanges = (int **) malloc
413 (exchange_node->n_exchanges * sizeof(int *));
414 if(NULL == exchange_node->rank_exchanges) {
415 goto Error;
416 }
417 for (i = 0; i < exchange_node->n_exchanges; i++) {
418 exchange_node->rank_exchanges[i] = (int *) malloc
419 ((tree_order - 1) * sizeof(int));
420 if( NULL == exchange_node->rank_exchanges ) {
421 goto Error;
422 }
423 }
424 k_temp1 = tree_order;
425 k_temp2 = 1;
426 /* fill in exchange partners */
427 /* Ok, now we start with the actual algorithm */
428 for( i = 0; i < exchange_node->n_exchanges; i ++) {
429 /*fprintf(stderr,"Starting Level %d\n",i+1);*/
430
431 FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
432 /*fprintf(stderr,"Myid %d base %d\n",node_rank,base);*/
433 for( j = 0; j < (tree_order-1); j ++ ) {
434 peer = base + (exchange_node->reindex_myid + k_temp2*(j+1))%k_temp1;
435 if ( peer < stray ) {
436 exchange_node->rank_exchanges[i][j] = exchange_node->reindex_map[peer];
437 /* an actual exchange occurs, bump the counter */
438
439 } else {
440 /* out of range, skip it - do not bump the n_actual_exchanges counter */
441 exchange_node->rank_exchanges[i][j] = -1;
442 }
443
444 }
445 k_temp1 *= tree_order;
446 k_temp2 *= tree_order;
447 }
448 for(i = 0; i < pow_k; i++){
449 for(j = 0; j < (tree_order-1); j++){
450 if(-1 != exchange_node->rank_exchanges[i][j]){
451 /* then bump the counter */
452 exchange_node->n_actual_exchanges++;
453 }
454 }
455 }
456
457 } else {
458 /* we are extra ranks and we don't participate in the exchange :( */
459 exchange_node->n_exchanges=0;
460 exchange_node->rank_exchanges=NULL;
461 }
462
463
464 /* set the number of tags needed per stripe - this must be the
465 * same across all procs in the communicator.
466 */
467 /* do we need this one */
468 exchange_node->n_tags = tree_order * n_levels + 1;
469
470 free(prev_data);
471 free(current_data);
472 free(group_info);
473
474 /* successful return */
475 return OMPI_SUCCESS;
476
477 Error:
478
479 if (NULL != exchange_node->rank_extra_sources_array) {
480 free(exchange_node->rank_extra_sources_array);
481 }
482
483 if (NULL != exchange_node->rank_exchanges) {
484 for (i = 0; i < exchange_node->n_exchanges; i++) {
485 if (NULL != exchange_node->rank_exchanges[i]) {
486 free(exchange_node->rank_exchanges[i]);
487 }
488 }
489 free(exchange_node->rank_exchanges);
490 }
491
492 if (NULL != prev_data ){
493 free(prev_data);
494 }
495
496 if(NULL != current_data) {
497 free(current_data);
498 }
499
500 if(NULL != group_info) {
501 free(group_info);
502 }
503
504 /* error return */
505 return OMPI_ERROR;
506 }
507
ompi_netpatterns_cleanup_recursive_knomial_allgather_tree_node(netpatterns_k_exchange_node_t * exchange_node)508 OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_knomial_allgather_tree_node(
509 netpatterns_k_exchange_node_t *exchange_node)
510 {
511 int i;
512
513 free(exchange_node->reindex_map);
514 free(exchange_node->inv_reindex_map);
515 if (exchange_node->n_extra_sources > 0) {
516 free(exchange_node->rank_extra_sources_array) ;
517 exchange_node->n_extra_sources = 0;
518 exchange_node->rank_extra_sources_array = NULL;
519 }
520 if (exchange_node->n_exchanges > 0) {
521 for (i=0; i < exchange_node->n_exchanges; i++) {
522 free(exchange_node->rank_exchanges[i]);
523 exchange_node->rank_exchanges[i] = NULL;
524 }
525 free(exchange_node->rank_exchanges);
526 exchange_node->rank_exchanges = NULL;
527 exchange_node->n_exchanges = 0;
528 }
529 for(i = 0; i < exchange_node->log_tree_order; i++){
530 free(exchange_node->payload_info[i]);
531 }
532 free(exchange_node->payload_info);
533 }
534
ompi_netpatterns_setup_recursive_knomial_tree_node(int num_nodes,int node_rank,int tree_order,netpatterns_k_exchange_node_t * exchange_node)535 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_knomial_tree_node(
536 int num_nodes, int node_rank, int tree_order,
537 netpatterns_k_exchange_node_t *exchange_node)
538 {
539 /* local variables */
540 int i, j, tmp, cnt;
541 int n_levels;
542 int k_base, kpow_num, peer;
543
544 NETPATTERNS_VERBOSE(
545 ("Enter ompi_netpatterns_setup_recursive_knomial_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)",
546 num_nodes, node_rank, tree_order));
547
548 assert(num_nodes > 1);
549 assert(tree_order > 1);
550 if (tree_order > num_nodes) {
551 tree_order = num_nodes;
552 }
553
554 exchange_node->tree_order = tree_order;
555
556 /* figure out number of levels in the tree */
557 n_levels = 0;
558 /* cnt - number of ranks in given level */
559 cnt=1;
560 while ( num_nodes > cnt ) {
561 cnt *= tree_order;
562 n_levels++;
563 };
564
565 /* figure out the largest power of tree_order that is less than or equal to
566 * num_nodes */
567 if ( cnt > num_nodes) {
568 cnt /= tree_order;
569 n_levels--;
570 }
571
572 exchange_node->log_tree_order = n_levels;
573 exchange_node->n_largest_pow_tree_order = cnt;
574
575 /* set node characteristics - node that is not within the largest
576 * power of tree_order will just send it's data to node that will participate
577 * in the recursive doubling, and get the result back at the end.
578 */
579 if (node_rank + 1 > cnt) {
580 exchange_node->node_type = EXTRA_NODE;
581 } else {
582 exchange_node->node_type = EXCHANGE_NODE;
583 }
584
585
586 /* set the initial and final data exchanges - those that are not
587 * part of the recursive doubling.
588 */
589 if (EXCHANGE_NODE == exchange_node->node_type) {
590 exchange_node->n_extra_sources = 0;
591 for (i = 0, tmp = node_rank * (tree_order - 1) + cnt + i;
592 tmp < num_nodes && i < tree_order - 1;
593 ++i, ++tmp) {
594 ++exchange_node->n_extra_sources;
595 }
596
597 assert(exchange_node->n_extra_sources < tree_order);
598
599 if (exchange_node->n_extra_sources > 0) {
600 exchange_node->rank_extra_sources_array = (int *) malloc
601 (exchange_node->n_extra_sources * sizeof(int));
602 if( NULL == exchange_node->rank_extra_sources_array ) {
603 goto Error;
604 }
605 for (i = 0, tmp = node_rank * (tree_order - 1) + cnt;
606 i < tree_order - 1 && tmp < num_nodes; ++i, ++tmp) {
607 NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
608 exchange_node->rank_extra_sources_array[i] = tmp;
609 }
610 } else {
611 exchange_node->rank_extra_sources_array = NULL;
612 }
613 } else {
614 exchange_node->n_extra_sources = 1;
615 exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
616 if( NULL == exchange_node->rank_extra_sources_array ) {
617 goto Error;
618 }
619 exchange_node->rank_extra_sources_array[0] = (node_rank - cnt) / (tree_order - 1);
620 NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0,
621 exchange_node->rank_extra_sources_array[0] ));
622 }
623
624 /* set the exchange pattern */
625 if (EXCHANGE_NODE == exchange_node->node_type) {
626 exchange_node->n_exchanges = n_levels;
627 /* Allocate 2 dimension array thak keeps
628 rank exchange information for each step*/
629 exchange_node->rank_exchanges = (int **) malloc
630 (exchange_node->n_exchanges * sizeof(int *));
631 if(NULL == exchange_node->rank_exchanges) {
632 goto Error;
633 }
634 for (i = 0; i < exchange_node->n_exchanges; i++) {
635 exchange_node->rank_exchanges[i] = (int *) malloc
636 ((tree_order - 1) * sizeof(int));
637 if( NULL == exchange_node->rank_exchanges ) {
638 goto Error;
639 }
640 }
641 /* fill in exchange partners */
642 for(i = 0, kpow_num = 1; i < exchange_node->n_exchanges;
643 i++, kpow_num *= tree_order) {
644 k_base = node_rank / (kpow_num * tree_order);
645 for(j = 1; j < tree_order; j++) {
646 peer = node_rank + kpow_num * j;
647 if (k_base != peer/(kpow_num * tree_order)) {
648 /* Wraparound the number */
649 peer = k_base * (kpow_num * tree_order) +
650 peer % (kpow_num * tree_order);
651 }
652 exchange_node->rank_exchanges[i][j - 1] = peer;
653 NETPATTERNS_VERBOSE(("rank_exchanges#(%d,%d)/%d = %d",
654 i, j, tree_order, peer));
655 }
656 }
657 } else {
658 exchange_node->n_exchanges=0;
659 exchange_node->rank_exchanges=NULL;
660 }
661
662 /* set the number of tags needed per stripe - this must be the
663 * same across all procs in the communicator.
664 */
665 /* do we need this one */
666 exchange_node->n_tags = tree_order * n_levels + 1;
667
668 /* successful return */
669 return OMPI_SUCCESS;
670
671 Error:
672
673 ompi_netpatterns_cleanup_recursive_knomial_tree_node (exchange_node);
674
675 /* error return */
676 return OMPI_ERROR;
677 }
678
ompi_netpatterns_cleanup_recursive_knomial_tree_node(netpatterns_k_exchange_node_t * exchange_node)679 OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_knomial_tree_node(
680 netpatterns_k_exchange_node_t *exchange_node)
681 {
682 int i;
683
684 if (exchange_node->n_extra_sources > 0) {
685 free(exchange_node->rank_extra_sources_array);
686 exchange_node->rank_extra_sources_array = NULL;
687 exchange_node->n_extra_sources = 0;
688 }
689 if (exchange_node->n_exchanges > 0) {
690 for (i=0 ; i<exchange_node->n_exchanges; i++) {
691 free(exchange_node->rank_exchanges[i]);
692 exchange_node->rank_exchanges[i] = NULL;
693 }
694 free(exchange_node->rank_exchanges);
695 exchange_node->rank_exchanges = NULL;
696 exchange_node->n_exchanges = 0;
697 }
698 }
699
700 #if 1
ompi_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes,int node_rank,int tree_order,netpatterns_pair_exchange_node_t * exchange_node)701 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank, int tree_order,
702 netpatterns_pair_exchange_node_t *exchange_node)
703 {
704 /* local variables */
705 int i, tmp, cnt;
706 int n_levels;
707 int shift, mask;
708
709 NETPATTERNS_VERBOSE(("Enter ompi_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes=%d, node_rank=%d, tree_order=%d)", num_nodes, node_rank, tree_order));
710
711 assert(num_nodes > 1);
712 while (tree_order > num_nodes) {
713 tree_order /= 2;
714 }
715
716 exchange_node->tree_order = tree_order;
717 /* We support only tree_order that are power of two */
718 assert(0 == (tree_order & (tree_order - 1)));
719
720 /* figure out number of levels in the tree */
721 n_levels = 0;
722 /* cnt - number of ranks in given level */
723 cnt=1;
724 while ( num_nodes > cnt ) {
725 cnt *= tree_order;
726 n_levels++;
727 };
728
729 /* figure out the largest power of tree_order that is less than or equal to
730 * num_nodes */
731 if ( cnt > num_nodes) {
732 cnt /= tree_order;
733 n_levels--;
734 }
735 exchange_node->log_tree_order = n_levels;
736 if (2 == tree_order) {
737 exchange_node->log_2 = exchange_node->log_tree_order;
738 }
739
740 tmp=1;
741 for (i=0 ; i < n_levels ; i++ ) {
742 tmp *= tree_order;
743 }
744 /* Ishai: I see no reason for calculating tmp. Add an assert before deleting it */
745 assert(tmp == cnt);
746
747 exchange_node->n_largest_pow_tree_order = tmp;
748 if (2 == tree_order) {
749 exchange_node->n_largest_pow_2 = exchange_node->n_largest_pow_tree_order;
750 }
751
752 /* set node characteristics - node that is not within the largest
753 * power of tree_order will just send it's data to node that will participate
754 * in the recursive doubling, and get the result back at the end.
755 */
756 if ( node_rank + 1 > cnt ) {
757 exchange_node->node_type = EXTRA_NODE;
758 } else {
759 exchange_node->node_type = EXCHANGE_NODE;
760 }
761
762 /* set the initial and final data exchanges - those that are not
763 * part of the recursive doubling.
764 */
765 if ( EXCHANGE_NODE == exchange_node->node_type ) {
766 exchange_node->n_extra_sources = 0;
767 for (tmp = node_rank + cnt; tmp < num_nodes; tmp += cnt) {
768 ++exchange_node->n_extra_sources;
769 }
770 if (exchange_node->n_extra_sources > 0) {
771 exchange_node->rank_extra_sources_array = (int *) malloc
772 (exchange_node->n_extra_sources * sizeof(int));
773 if( NULL == exchange_node->rank_extra_sources_array ) {
774 goto Error;
775 }
776 for (i = 0, tmp = node_rank + cnt; tmp < num_nodes; ++i, tmp += cnt) {
777 NETPATTERNS_VERBOSE(("extra_source#%d = %d", i, tmp));
778 exchange_node->rank_extra_sources_array[i] = tmp;
779 }
780 } else {
781 exchange_node->rank_extra_sources_array = NULL;
782 }
783 } else {
784 exchange_node->n_extra_sources = 1;
785 exchange_node->rank_extra_sources_array = (int *) malloc (sizeof(int));
786 if( NULL == exchange_node->rank_extra_sources_array ) {
787 goto Error;
788 }
789 exchange_node->rank_extra_sources_array[0] = node_rank & (cnt - 1);
790 NETPATTERNS_VERBOSE(("extra_source#%d = %d", 0, node_rank & (cnt - 1)));
791 }
792
793 /* Ishai: To be compatable with the old structure - should be remoived later */
794 if (1 == exchange_node->n_extra_sources) {
795 exchange_node->rank_extra_source = exchange_node->rank_extra_sources_array[0];
796 } else {
797 exchange_node->rank_extra_source = -1;
798 }
799
800 /* set the exchange pattern */
801 if ( EXCHANGE_NODE == exchange_node->node_type ) {
802 exchange_node->n_exchanges = n_levels * (tree_order - 1);
803 exchange_node->rank_exchanges = (int *) malloc
804 (exchange_node->n_exchanges * sizeof(int));
805 if( NULL == exchange_node->rank_exchanges ) {
806 goto Error;
807 }
808
809 /* fill in exchange partners */
810 for ( i = 0, shift = 1 ; i < exchange_node->n_exchanges ; shift *= tree_order ) {
811 for ( mask = 1 ; mask < tree_order ; ++mask, ++i ) {
812 exchange_node->rank_exchanges[i] = node_rank ^ (mask * shift);
813 NETPATTERNS_VERBOSE(("rank_exchanges#%d/%d = %d", i, tree_order, node_rank ^ (mask * shift)));
814 }
815 }
816
817 } else {
818
819 exchange_node->n_exchanges=0;
820 exchange_node->rank_exchanges=NULL;
821
822 }
823
824 /* set the number of tags needed per stripe - this must be the
825 * same across all procs in the communicator.
826 */
827 /* Ishai: Need to find out what is n_tags */
828 exchange_node->n_tags = tree_order * n_levels + 1;
829
830 /* successful return */
831 return OMPI_SUCCESS;
832
833 Error:
834 if (exchange_node->rank_extra_sources_array != NULL) {
835 free(exchange_node->rank_extra_sources_array);
836 }
837
838 /* error return */
839 return OMPI_ERROR;
840 }
841
ompi_netpatterns_cleanup_recursive_doubling_tree_node(netpatterns_pair_exchange_node_t * exchange_node)842 OMPI_DECLSPEC void ompi_netpatterns_cleanup_recursive_doubling_tree_node(
843 netpatterns_pair_exchange_node_t *exchange_node)
844 {
845 NETPATTERNS_VERBOSE(("About to release rank_extra_sources_array and rank_exchanges"));
846 if (exchange_node->rank_extra_sources_array != NULL) {
847 free(exchange_node->rank_extra_sources_array);
848 }
849
850 if (exchange_node->rank_exchanges != NULL) {
851 free(exchange_node->rank_exchanges);
852 }
853 }
854 #endif
855
ompi_netpatterns_setup_recursive_doubling_tree_node(int num_nodes,int node_rank,netpatterns_pair_exchange_node_t * exchange_node)856 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,
857 netpatterns_pair_exchange_node_t *exchange_node)
858 {
859 return ompi_netpatterns_setup_recursive_doubling_n_tree_node(num_nodes, node_rank, 2, exchange_node);
860 }
861
862 #if 0
863 /*OMPI_DECLSPEC int old_netpatterns_setup_recursive_doubling_tree_node(int num_nodes, int node_rank,*/
864 OMPI_DECLSPEC int ompi_netpatterns_setup_recursive_doubling_n_tree_node(int num_nodes, int node_rank,int tree_order,
865 netpatterns_pair_exchange_node_t *exchange_node)
866 {
867 /* local variables */
868 /*int tree_order;*/
869 int i,tmp,cnt,result,n_extra_nodes;
870 int n_exchanges;
871
872 /* figure out number of levels in the tree */
873
874 n_exchanges=0;
875 result=num_nodes;
876 /* tree_order=2;*/
877 /* cnt - number of ranks in given level */
878 cnt=1;
879 while( num_nodes > cnt ) {
880 cnt*=tree_order;
881 n_exchanges++;
882 };
883
884 /* figure out the largest power of 2 that is less than or equal to
885 * num_nodes */
886 if( cnt > num_nodes) {
887 cnt/=tree_order;
888 n_exchanges--;
889 }
890 exchange_node->log_2=n_exchanges;
891
892 tmp=1;
893 for(i=0 ; i < n_exchanges ; i++ ) {
894 tmp*=2;
895 }
896 exchange_node->n_largest_pow_2=tmp;
897
898 /* set node characteristics - node that is not within the largest
899 * power of 2 will just send it's data to node that will participate
900 * in the recursive doubling, and get the result back at the end.
901 */
902 if( node_rank+1 > cnt ) {
903 exchange_node->node_type=EXTRA_NODE;
904 } else {
905 exchange_node->node_type=EXCHANGE_NODE;
906 }
907
908 /* set the initial and final data exchanges - those that are not
909 * part of the recursive doubling.
910 */
911 n_extra_nodes=num_nodes-cnt;
912
913 if ( EXCHANGE_NODE == exchange_node->node_type ) {
914
915 if( node_rank < n_extra_nodes ) {
916 exchange_node->n_extra_sources=1;
917 exchange_node->rank_extra_source=cnt+node_rank;
918 } else {
919 exchange_node->n_extra_sources=0;
920 exchange_node->rank_extra_source=-1;
921 }
922
923 } else {
924 exchange_node->n_extra_sources=1;
925 exchange_node->rank_extra_source=node_rank-cnt;
926 }
927
928 /* set the exchange pattern */
929 if( EXCHANGE_NODE == exchange_node->node_type ) {
930
931 exchange_node->n_exchanges=n_exchanges;
932 exchange_node->rank_exchanges=(int *) malloc
933 (n_exchanges*sizeof(int));
934 if( NULL == exchange_node->rank_exchanges ) {
935 goto Error;
936 }
937
938 /* fill in exchange partners */
939 result=1;
940 tmp=node_rank;
941 for( i=0 ; i < n_exchanges ; i++ ) {
942 if(tmp & 1 ) {
943 exchange_node->rank_exchanges[i]=
944 node_rank-result;
945 } else {
946 exchange_node->rank_exchanges[i]=
947 node_rank+result;
948 }
949 result*=2;
950 tmp/=2;
951 }
952
953 } else {
954
955 exchange_node->n_exchanges=0;
956 exchange_node->rank_exchanges=NULL;
957
958 }
959
960 /* set the number of tags needed per stripe - this must be the
961 * same across all procs in the communicator.
962 */
963 exchange_node->n_tags=2*n_exchanges+1;
964
965 /* Ishai: to make sure free will work also for people that call this function */
966 exchange_node->rank_extra_sources_array = NULL;
967
968 /* successful return */
969 return OMPI_SUCCESS;
970
971 Error:
972
973 /* error return */
974 return OMPI_ERROR;
975 }
976 #endif
977
978