1 #include <hwloc.h>
2 #include <hwloc/helper.h>
3 #include "tm_tree.h"
4 #include "tm_mapping.h"
5 #include <ctype.h>
6 #include "tm_verbose.h"
7 #include "tm_solution.h"
8
9
10 tm_topology_t* get_local_topo_with_hwloc(void);
11 tm_topology_t* hwloc_to_tm(char *filename);
12 int int_cmp_inc(const void* x1,const void* x2);
13 void optimize_arity(int **arity, double **cost, int *nb_levels,int n);
14 int symetric(hwloc_topology_t topology);
15 tm_topology_t * tgt_to_tm(char *filename);
16 void tm_display_arity(tm_topology_t *topology);
17 void tm_display_topology(tm_topology_t *topology);
18 void tm_free_topology(tm_topology_t *topology);
19 tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
20 void tm_optimize_topology(tm_topology_t **topology);
21 int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology);
22 int topo_nb_proc(hwloc_topology_t topology,int N);
23 void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels);
24 void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints);
25 void topology_cost_cpy(tm_topology_t *topology,double **cost);
26 void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes);
27 double ** topology_to_arch(hwloc_topology_t topology);
28 void build_synthetic_proc_id(tm_topology_t *topology);
29 tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
30
31
32 #define LINE_SIZE (1000000)
33
34
35 /* transform a tgt scotch file into a topology file*/
tgt_to_tm(char * filename)36 tm_topology_t * tgt_to_tm(char *filename)
37 {
38 tm_topology_t *topology = NULL;
39 FILE *pf = NULL;
40 char line[1024];
41 char *s = NULL;
42 double *cost = NULL;
43 int i;
44
45
46
47 pf = fopen(filename,"r");
48 if(!pf){
49 if(tm_get_verbose_level() >= CRITICAL)
50 fprintf(stderr,"Cannot open %s\n",filename);
51 exit(-1);
52 }
53
54 if(tm_get_verbose_level() >= INFO)
55 printf("Reading TGT file: %s\n",filename);
56
57
58 fgets(line,1024,pf);
59 fclose(pf);
60
61 s = strstr(line,"tleaf");
62 if(!s){
63 if(tm_get_verbose_level() >= CRITICAL)
64 fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename);
65 exit(-1);
66 }
67
68 s += 5;
69 while(isspace(*s))
70 s++;
71
72 topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
73 topology->nb_constraints = 0;
74 topology->oversub_fact = 1;
75 topology->constraints = NULL;
76 topology->nb_levels = atoi(strtok(s," "))+1;
77 topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
78
79 cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
80
81 for( i = 0 ; i < topology->nb_levels-1 ; i++ ){
82 topology->arity[i] = atoi(strtok(NULL," "));
83 cost[i] = atoi(strtok(NULL," "));
84 }
85
86 topology->arity[topology->nb_levels-1] = 0;
87 /* cost[topology->nb_levels-1]=0; */
88
89 /*aggregate costs*/
90 for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
91 cost[i] += cost[i+1];
92
93 build_synthetic_proc_id(topology);
94
95 if(tm_get_verbose_level() >= INFO)
96 printf("Topology built from %s!\n",filename);
97
98 topology->cost=cost;
99
100
101 return topology;
102 }
103
topo_nb_proc(hwloc_topology_t topology,int N)104 int topo_nb_proc(hwloc_topology_t topology,int N)
105 {
106 hwloc_obj_t *objs = NULL;
107 int nb_proc;
108
109 objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N);
110 objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL);
111 nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1);
112 FREE(objs);
113 return nb_proc;
114 }
115
116
117
link_cost(int depth)118 static double link_cost(int depth)
119 {
120 /*
121 Bertha values
122 double tab[5]={21,9,4.5,2.5,0.001};
123 double tab[5]={1,1,1,1,1};
124 double tab[6]={100000,10000,1000,500,100,10};
125 */
126 double tab[11] = {1024,512,256,128,64,32,16,8,4,2,1};
127
128 return tab[depth];
129 /*
130 return 10*log(depth+2);
131 return (depth+1);
132 return (long int)pow(100,depth);
133 */
134 }
135
136
topology_to_arch(hwloc_topology_t topology)137 double ** topology_to_arch(hwloc_topology_t topology)
138 {
139 int nb_proc,i,j;
140 hwloc_obj_t obj_proc1,obj_proc2,obj_res;
141 double **arch = NULL;
142
143 nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
144 if( nb_proc <= 0 ) { /* if multiple levels with PUs */
145 return NULL;
146 }
147 arch = (double**)MALLOC(sizeof(double*)*nb_proc);
148 if( NULL == arch ) {
149 return NULL;
150 }
151
152 for( i = 0 ; i < nb_proc ; i++ ){
153 obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
154 arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);
155 for( j = 0 ; j < nb_proc ; j++ ){
156 obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j);
157 obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2);
158 /* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */
159 arch[obj_proc1->os_index][obj_proc2->os_index]=link_cost(obj_res->depth+1);
160 }
161 }
162 return arch;
163 }
164
symetric(hwloc_topology_t topology)165 int symetric(hwloc_topology_t topology)
166 {
167 int depth,i,topodepth = hwloc_topology_get_depth(topology);
168 unsigned int arity;
169 hwloc_obj_t obj;
170 for ( depth = 0; depth < topodepth-1 ; depth++ ) {
171 int N = hwloc_get_nbobjs_by_depth(topology, depth);
172 obj = hwloc_get_next_obj_by_depth (topology,depth,NULL);
173 arity = obj->arity;
174
175 /* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */
176 for (i = 1; i < N; i++ ){
177 obj = hwloc_get_next_obj_by_depth (topology,depth,obj);
178 if( obj->arity != arity){
179 /* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */
180 return 0;
181 }
182 }
183 }
184 return 1;
185 }
186
hwloc_to_tm(char * filename)187 tm_topology_t* hwloc_to_tm(char *filename)
188 {
189 hwloc_topology_t topology;
190 tm_topology_t *res = NULL;
191 hwloc_obj_t *objs = NULL;
192 unsigned topodepth,depth;
193 unsigned int nb_nodes;
194 double *cost;
195 int err, l;
196 unsigned int i;
197 int vl = tm_get_verbose_level();
198
199 /* Build the topology */
200 hwloc_topology_init(&topology);
201 err = hwloc_topology_set_xml(topology,filename);
202 if(err == -1){
203 if(vl >= CRITICAL)
204 fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
205 exit(-1);
206 }
207
208 #if HWLOC_API_VERSION >= 0x00020000
209 hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
210 #else /* HWLOC_API_VERSION >= 0x00020000 */
211 hwloc_topology_ignore_all_keep_structure(topology);
212 #endif /* HWLOC_API_VERSION >= 0x00020000 */
213 hwloc_topology_load(topology);
214
215
216 /* Test if symetric */
217 if(!symetric(topology)){
218 if(tm_get_verbose_level() >= CRITICAL)
219 fprintf(stderr,"%s not symetric!\n",filename);
220 exit(-1);
221 }
222
223 /* work on depth */
224 topodepth = hwloc_topology_get_depth(topology);
225
226 res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
227 res->oversub_fact = 1;
228 res->nb_constraints = 0;
229 res->constraints = NULL;
230 res->nb_levels = topodepth;
231 res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
232 res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
233 res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
234 res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
235
236 if(vl >= INFO)
237 printf("topodepth = %d\n",topodepth);
238
239 /* Build TreeMatch topology */
240 for( depth = 0 ; depth < topodepth ; depth++ ){
241 nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
242 res->nb_nodes[depth] = nb_nodes;
243 res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
244 res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
245
246 objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
247 objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
248 hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
249 res->arity[depth] = objs[0]->arity;
250
251 if (depth == topodepth -1){
252 res->nb_constraints = nb_nodes;
253 res->nb_proc_units = nb_nodes;
254 }
255
256 if(vl >= DEBUG)
257 printf("\n--%d(%d) **%d**:--\n",res->arity[depth],nb_nodes,res->arity[0]);
258
259 /* Build process id tab */
260 for (i = 0; i < nb_nodes; i++){
261 if(objs[i]->os_index > nb_nodes){
262 if(vl >= CRITICAL){
263 fprintf(stderr, "Index of object %d of level %d is %d and larger than number of nodes : %d\n",
264 i, depth, objs[i]->os_index, nb_nodes);
265 }
266 exit(-1);
267 }
268
269 res->node_id[depth][i] = objs[i]->os_index;
270 res->node_rank[depth][objs[i]->os_index] = i;
271 /* if(depth==topodepth-1) */
272 }
273 FREE(objs);
274
275
276 }
277
278 cost = (double*)CALLOC(res->nb_levels,sizeof(double));
279 for(l=0; l<res->nb_levels; l++){
280 cost[l] = link_cost(l);
281 }
282 res->cost = cost;
283
284
285 /* Destroy topology object. */
286 hwloc_topology_destroy(topology);
287 if(tm_get_verbose_level() >= INFO)
288 printf("\n");
289
290
291
292 return res;
293 }
294
get_local_topo_with_hwloc(void)295 tm_topology_t* get_local_topo_with_hwloc(void)
296 {
297 hwloc_topology_t topology;
298 tm_topology_t *res = NULL;
299 hwloc_obj_t *objs = NULL;
300 unsigned topodepth,depth;
301 int nb_nodes,i;
302
303 /* Build the topology */
304 hwloc_topology_init(&topology);
305 #if HWLOC_API_VERSION >= 0x00020000
306 hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
307 #else /* HWLOC_API_VERSION >= 0x00020000 */
308 hwloc_topology_ignore_all_keep_structure(topology);
309 #endif /* HWLOC_API_VERSION >= 0x00020000 */
310 hwloc_topology_load(topology);
311
312 /* Test if symetric */
313 if(!symetric(topology)){
314 if(tm_get_verbose_level() >= CRITICAL)
315 fprintf(stderr,"Local toplogy not symetric!\n");
316 exit(-1);
317 }
318
319 /* work on depth */
320 topodepth = hwloc_topology_get_depth(topology);
321
322 res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
323 res->nb_constraints = 0;
324 res->constraints = NULL;
325 res->nb_levels = topodepth;
326 res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
327 res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
328 res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
329 res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
330
331 /* Build TreeMatch topology */
332 for( depth = 0 ; depth < topodepth ; depth++ ){
333 nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
334 res->nb_nodes[depth] = nb_nodes;
335 res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
336 res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
337
338 objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
339 objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
340 hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
341 res->arity[depth] = objs[0]->arity;
342
343 if (depth == topodepth -1){
344 res->nb_constraints = nb_nodes;
345 res->nb_proc_units = nb_nodes;
346 }
347 /* printf("%d:",res->arity[depth]); */
348
349 /* Build process id tab */
350 for (i = 0; i < nb_nodes; i++){
351 res->node_id[depth][i] = objs[i]->os_index;
352 res->node_rank[depth][objs[i]->os_index] = i;
353 /* if(depth==topodepth-1) */
354 }
355 FREE(objs);
356 }
357
358
359
360 /* Destroy HWLOC topology object. */
361 hwloc_topology_destroy(topology);
362
363 /* printf("\n"); */
364 return res;
365 }
366
367
tm_free_topology(tm_topology_t * topology)368 void tm_free_topology(tm_topology_t *topology)
369 {
370 int i;
371 for( i = 0 ; i < topology->nb_levels ; i++ ){
372 FREE(topology->node_id[i]);
373 FREE(topology->node_rank[i]);
374 }
375
376 FREE(topology->constraints);
377 FREE(topology->node_id);
378 FREE(topology->node_rank);
379 FREE(topology->nb_nodes);
380 FREE(topology->arity);
381 FREE(topology->cost);
382 FREE(topology);
383 }
384
tm_load_topology(char * arch_filename,tm_file_type_t arch_file_type)385 tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type){
386 switch(arch_file_type){
387 case TM_FILE_TYPE_TGT:
388 return tgt_to_tm(arch_filename);
389 case TM_FILE_TYPE_XML:
390 return hwloc_to_tm(arch_filename);
391 default:
392 if(tm_get_verbose_level() >= ERROR){
393 fprintf(stderr,"Error loading topology. Filetype %d unknown\n", arch_file_type);
394 }
395 exit(-1);
396 }
397 }
398
399
tm_display_topology(tm_topology_t * topology)400 void tm_display_topology(tm_topology_t *topology)
401 {
402 int i;
403 unsigned int j;
404 unsigned long id;
405 for( i = 0 ; i < topology->nb_levels ; i++ ){
406 printf("%d: ",i);
407 for( j = 0 ; j < topology->nb_nodes[i] ; j++)
408 printf("%d ",topology->node_id[i][j]);
409 printf("\n");
410 }
411
412 printf("Last level: ");
413 for(id = 0; id < topology->nb_nodes[topology->nb_levels-1]/topology->oversub_fact; id++)
414 printf("%d ",topology->node_rank[topology->nb_levels-1][id]);
415 printf("\n");
416
417
418 if(topology->constraints){
419 printf("Constraints: ");
420 for(i = 0; i < topology->nb_constraints; i++)
421 printf("%d ",topology->constraints[i]);
422 printf("\n");
423 }
424
425 printf("\tnb_levels=%d\n\tnb_constraints=%d\n\toversub_fact=%d\n\tnb proc units=%d\n\n",
426 topology->nb_levels, topology->nb_constraints, topology->oversub_fact, topology->nb_proc_units);
427
428 }
429
430
tm_display_arity(tm_topology_t * topology)431 void tm_display_arity(tm_topology_t *topology){
432 int depth;
433 for(depth=0; depth < topology->nb_levels; depth++)
434 printf("%d(%lf): ",topology->arity[depth], topology->cost[depth]);
435
436 printf("\n");
437 }
438
int_cmp_inc(const void * x1,const void * x2)439 int int_cmp_inc(const void* x1,const void* x2)
440 {
441 return *((int *)x1) < *((int *)x2) ? -1 : 1;
442 }
443
444
topo_check_constraints(tm_topology_t * topology)445 static int topo_check_constraints(tm_topology_t *topology){
446 int n = topology->nb_constraints;
447 int i;
448 int depth = topology->nb_levels-1;
449 for (i=0;i<n;i++){
450 if(!in_tab(topology->node_id[depth], topology->nb_nodes[depth], topology->constraints[i])){
451 if(tm_get_verbose_level() >= CRITICAL){
452 fprintf(stderr,"Error! Incompatible constraint with the topology: rank %d in the constraints is not a valid id of any nodes of the topology.\n",topology->constraints[i]);
453 }
454 return 0;
455 }
456 }
457 return 1;
458 }
459
460
461
462
463 /* cpy flag tells if we need to copy the array.
464 Set to 1 when called from the application level and 0 when called from inside the library*/
tm_topology_set_binding_constraints_cpy(int * constraints,int nb_constraints,tm_topology_t * topology,int cpy_flag)465 static int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){
466
467 topology -> nb_constraints = nb_constraints;
468 if(cpy_flag){
469 topology -> constraints = (int*)MALLOC(nb_constraints*sizeof(int));
470 memcpy(topology -> constraints, constraints, nb_constraints*sizeof(int));
471 }else{
472 topology -> constraints = constraints;
473 }
474
475 return topo_check_constraints(topology);
476 }
477
tm_topology_set_binding_constraints(int * constraints,int nb_constraints,tm_topology_t * topology)478 int tm_topology_set_binding_constraints(int *constraints, int nb_constraints, tm_topology_t *topology){
479 return tm_topology_set_binding_constraints_cpy(constraints, nb_constraints, topology, 1);
480 }
481
tm_topology_add_binding_constraints(char * constraints_filename,tm_topology_t * topology)482 int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology)
483 {
484 int *tab = NULL;
485 FILE *pf = NULL;
486 char line[LINE_SIZE],*l = NULL;
487 char *ptr = NULL;
488 int i,n;
489 unsigned int vl = tm_get_verbose_level();
490
491
492 if (!(pf = fopen(constraints_filename,"r"))) {
493 if(vl >= CRITICAL)
494 fprintf(stderr,"Cannot open %s\n",constraints_filename);
495 exit(-1);
496 }
497
498 /* compute the size of the array to store the constraints*/
499 n = 0;
500 fgets(line, LINE_SIZE, pf);
501 l = line;
502 while((ptr=strtok(l," \t"))){
503 l = NULL;
504 if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr))
505 n++;
506 }
507
508 tab = (int*)MALLOC(n*sizeof(int));
509
510 rewind(pf);
511 fgets(line, LINE_SIZE, pf);
512 fclose(pf);
513 l = line;
514 i = 0;
515 while((ptr=strtok(l," \t"))){
516 l = NULL;
517 if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr)){
518 if(i < n)
519 tab[i] = atoi(ptr);
520 else{
521 if(vl >= CRITICAL)
522 fprintf(stderr, "More than %d entries in %s\n", n, constraints_filename);
523 exit(-1);
524 }
525 i++;
526 }
527 }
528
529 if( i != n ){
530 if(vl >= CRITICAL)
531 fprintf(stderr, "Read %d entries while expecting %d ones\n", i, n);
532 exit(-1);
533 }
534
535 qsort(tab,n,sizeof(int),int_cmp_inc);
536
537 return tm_topology_set_binding_constraints_cpy(tab, n, topology, 0);
538 }
539
540
topology_numbering_cpy(tm_topology_t * topology,int ** numbering,int * nb_nodes)541 void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes)
542 {
543 int nb_levels;
544 unsigned int vl = tm_get_verbose_level();
545
546 nb_levels = topology->nb_levels;
547 *nb_nodes = topology->nb_nodes[nb_levels-1];
548 if(vl >= INFO)
549 printf("nb_nodes=%d\n",*nb_nodes);
550 *numbering = (int*)MALLOC(sizeof(int)*(*nb_nodes));
551 memcpy(*numbering,topology->node_id[nb_levels-1],sizeof(int)*(*nb_nodes));
552 }
553
topology_arity_cpy(tm_topology_t * topology,int ** arity,int * nb_levels)554 void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels)
555 {
556 *nb_levels = topology->nb_levels;
557 *arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
558 memcpy(*arity,topology->arity,sizeof(int)*(*nb_levels));
559 }
560
topology_constraints_cpy(tm_topology_t * topology,int ** constraints,int * nb_constraints)561 void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints)
562 {
563 *nb_constraints = topology->nb_constraints;
564 if(topology->constraints){
565 *constraints = (int*)MALLOC(sizeof(int)*(*nb_constraints));
566 memcpy(*constraints,topology->constraints,sizeof(int)*(*nb_constraints));
567 }else{
568 *constraints = NULL;
569 }
570 }
571
topology_cost_cpy(tm_topology_t * topology,double ** cost)572 void topology_cost_cpy(tm_topology_t *topology,double **cost)
573 {
574 *cost = (double*)MALLOC(sizeof(double)*(topology->nb_levels));
575 memcpy(*cost,topology->cost,sizeof(double)*(topology->nb_levels));
576 }
577
optimize_arity(int ** arity,double ** cost,int * nb_levels,int n)578 void optimize_arity(int **arity, double **cost, int *nb_levels,int n)
579 {
580 int a,i;
581 int *new_arity = NULL;
582 double *new_cost = NULL;
583
584 if( n < 0 )
585 return;
586 /* printf("n=%d\tnb_levels=%d\n",n,*nb_levels); */
587 /* for(i=0;i<*nb_levels;i++) */
588 /* printf("%d:",(*arity)[i]); */
589 /* printf("\n"); */
590 /* if(n==(*nb_levels)-3) */
591 /* exit(-1); */
592 a = (*arity)[n];
593 if( (a%3 == 0) && (a > 3) ){
594 /*
595 check if the arity of level n devides 3
596 If this is the case:
597 Add a level
598 */
599 (*nb_levels)++;
600 /* Build a new arity and cost arrays */
601 new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
602 new_cost = (double*)MALLOC(sizeof(double)*(*nb_levels));
603 /* Copy the begining if the old arrays */
604 for( i = 0 ; i < n ; i++){
605 new_arity[i] = (*arity)[i];
606 new_cost[i] = (*cost)[i];
607 }
608 /* set the nth level to arity 3 */
609 new_arity[n] = 3;
610 /* copy the cost to this level*/
611 new_cost[n] = (*cost)[n];;
612 /* printf("a=%d\n",a); */
613 /* Set the (n+1) level to arity a/3 */
614 new_arity[n+1] = a/3;
615 /*Dupliacte the cost as it is the same level originally*/
616 new_cost[n+1] = (*cost)[n];
617 /* Copy the end of the arrays */
618 for( i = n+2 ; i < *nb_levels ; i++){
619 new_arity[i] = (*arity)[i-1];
620 new_cost[i] = (*cost)[i-1];
621 }
622 FREE(*arity);
623 FREE(*cost);
624 /* if a/3 =3 then go to the next level */
625 if(new_arity[n+1] == 3)
626 optimize_arity(&new_arity,&new_cost,nb_levels,n);
627 else /* continue to this level (remember we just add a new level */
628 optimize_arity(&new_arity,&new_cost,nb_levels,n+1);
629 *arity=new_arity;
630 *cost=new_cost;
631 }else if( (a%2==0) && (a>2) ){/* same as above but for arity == 2 instead of 3 */
632 (*nb_levels)++;
633 new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
634 new_cost = (double*)MALLOC(sizeof(double)*(*nb_levels));
635 for( i = 0 ; i < n ; i++ ){
636 new_arity[i] = (*arity)[i];
637 new_cost[i] = (*cost)[i];
638 }
639 new_arity[n] = 2;
640 new_cost[n] = (*cost)[n];;
641 /* printf("a=%d\n",a); */
642 new_arity[n+1] = a/2;
643 new_cost[n+1] = (*cost)[n];
644 for( i = n+2 ; i < *nb_levels ; i++ ){
645 new_arity[i] = (*arity)[i-1];
646 new_cost[i] = (*cost)[i-1];
647 }
648 FREE(*arity);
649 FREE(*cost);
650 if(new_arity[n+1] == 2)
651 optimize_arity(&new_arity, &new_cost, nb_levels, n);
652 else
653 optimize_arity(&new_arity, &new_cost, nb_levels, n+1);
654 *arity = new_arity;
655 *cost= new_cost;
656 }else /* if nothing works go to next level. */
657 optimize_arity(arity, cost, nb_levels,n-1);
658 }
659
660
661
662
tm_optimize_topology(tm_topology_t ** topology)663 void tm_optimize_topology(tm_topology_t **topology){
664 int *arity = NULL,nb_levels;
665 int *numbering = NULL,nb_nodes;
666 tm_topology_t *new_topo;
667 double *cost;
668 unsigned int vl = tm_get_verbose_level();
669 int *constraints = NULL, nb_constraints;
670 int i;
671
672 if(vl >= DEBUG)
673 tm_display_arity(*topology);
674
675 topology_arity_cpy(*topology,&arity,&nb_levels);
676 topology_numbering_cpy(*topology,&numbering,&nb_nodes);
677 topology_constraints_cpy(*topology,&constraints,&nb_constraints);
678 topology_cost_cpy(*topology,&cost);
679
680
681 optimize_arity(&arity,&cost,&nb_levels,nb_levels-2);
682 new_topo = tm_build_synthetic_topology(arity, NULL, nb_levels,numbering,nb_nodes);
683 new_topo->cost = cost;
684 new_topo->constraints = constraints;
685 new_topo->nb_constraints = nb_constraints;
686 new_topo->nb_proc_units = (*topology)->nb_proc_units;
687 new_topo->oversub_fact = (*topology)->oversub_fact;
688
689
690
691 if(vl >= DEBUG){
692 if(constraints){
693 printf("Constraints: ");
694 for(i=0;i<nb_constraints;i++)
695 printf("%d - ",constraints[i]);
696 printf("\n");
697 }
698
699 tm_display_arity(new_topo);
700 }
701 FREE(arity);
702 FREE(numbering);
703 tm_free_topology(*topology);
704
705 *topology = new_topo;
706 /* exit(-1); */
707
708
709 }
710
711
712
713 /*
714 Build a synthetic balanced topology
715
716 arity : array of arity of the first nb_level (of size nb_levels)
717 cost : array of costs between the levels (of size nb_levels)
718 core_numbering: numbering of the core by the system. Array of size nb_core_per_node
719
720 nb_core_per_nodes: number of cores of a given node size of the array core_numbering
721
722 The numbering of the cores is done in round robin fashion after a width traversal of the topology.
723 for example:
724 {0,1,2,3} becomes 0,1,2,3,4,5,6,7...
725 and
726 {0,2,1,3} becomes 0,2,1,3,4,6,5,7,...
727 */
728
tm_build_synthetic_topology(int * arity,double * cost,int nb_levels,int * core_numbering,int nb_core_per_nodes)729 tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes)
730 {
731 tm_topology_t *topology = NULL;
732 int i,j,n;
733
734
735 topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
736 topology->nb_constraints = 0;
737 topology->oversub_fact = 1;
738 topology->constraints = NULL;
739 topology->nb_levels = nb_levels;
740 topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
741 topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
742 topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
743 topology->nb_nodes = (size_t *)MALLOC(sizeof(size_t)*topology->nb_levels);
744 if(cost)
745 topology->cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
746 else
747 topology->cost = NULL;
748
749 memcpy(topology->arity, arity, sizeof(int)*nb_levels);
750 if(cost)
751 memcpy(topology->cost, cost, sizeof(double)*nb_levels);
752
753 n = 1;
754 for( i = 0 ; i < topology->nb_levels ; i++ ){
755 topology->nb_nodes[i] = n;
756 topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
757 topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
758 if( i < topology->nb_levels-1){
759 for( j = 0 ; j < n ; j++ ){
760 topology->node_id[i][j] = j;
761 topology->node_rank[i][j]=j;
762 }
763 }else{
764 for( j = 0 ; j < n ; j++ ){
765 int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
766 topology->node_id[i][j] = id;
767 topology->node_rank[i][id] = j;
768 }
769 }
770
771
772 if (i == topology->nb_levels-1){
773 topology->nb_constraints = n;
774 topology->nb_proc_units = n;
775 }
776
777 n *= topology->arity[i];
778 }
779 if(cost){
780 /*aggregate costs*/
781 for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
782 topology->cost[i] += topology->cost[i+1];
783 }
784
785 return topology;
786 }
787
788
build_synthetic_proc_id(tm_topology_t * topology)789 void build_synthetic_proc_id(tm_topology_t *topology)
790 {
791 int i;
792 size_t j,n = 1;
793
794 topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
795 topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
796 topology->nb_nodes = (size_t*) MALLOC(sizeof(size_t)*topology->nb_levels);
797
798 for( i = 0 ; i < topology->nb_levels ; i++ ){
799 /* printf("n= %lld, arity := %d\n",n, topology->arity[i]); */
800 topology->nb_nodes[i] = n;
801 topology->node_id[i] = (int*)MALLOC(sizeof(long int)*n);
802 topology->node_rank[i] = (int*)MALLOC(sizeof(long int)*n);
803 if ( !topology->node_id[i] ){
804 if(tm_get_verbose_level() >= CRITICAL)
805 fprintf(stderr,"Cannot allocate level %d (of size %ld) of the topology\n", i, (unsigned long int)n);
806 exit(-1);
807 }
808
809 if (i == topology->nb_levels-1){
810 topology->nb_constraints = n;
811 topology->nb_proc_units = n;
812 }
813
814
815
816 for( j = 0 ; j < n ; j++ ){
817 topology->node_id[i][j] = j;
818 topology->node_rank[i][j] = j;
819 }
820 n *= topology->arity[i];
821 }
822
823 }
824
825
826
tm_enable_oversubscribing(tm_topology_t * topology,unsigned int oversub_fact)827 void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact){
828 {
829 int i,j,n;
830
831 if(oversub_fact <=1)
832 return;
833
834 topology -> nb_levels ++;
835 topology -> arity = (int*) REALLOC(topology->arity, sizeof(int)*topology->nb_levels);
836 topology -> cost = (double*) REALLOC(topology->cost, sizeof(double)*topology->nb_levels);
837 topology -> node_id = (int**) REALLOC(topology->node_id, sizeof(int*)*topology->nb_levels);
838 topology -> node_rank = (int**) REALLOC(topology->node_rank, sizeof(int*)*topology->nb_levels);
839 topology -> nb_nodes = (size_t *)REALLOC(topology->nb_nodes, sizeof(size_t)*topology->nb_levels);
840 topology -> oversub_fact = oversub_fact;
841
842 i = topology->nb_levels - 1;
843 n = topology->nb_nodes[i-1] * oversub_fact;
844 topology->arity[i-1] = oversub_fact;
845 topology->cost[i-1] = 0;
846 topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
847 topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
848 topology->nb_nodes[i] = n;
849
850 for( j = 0 ; j < n ; j++ ){
851 int id = topology->node_id[i-1][j/oversub_fact];
852 topology->node_id[i][j] = id;
853 topology->node_rank[i][id] = j;
854 }
855 }
856
857 }
858