1 #include <hwloc.h>
2 #include <hwloc/helper.h>
3 #include "tm_tree.h"
4 #include "tm_mapping.h"
5 #include <ctype.h>
6 #include "tm_verbose.h"
7 #include "tm_solution.h"
8 
9 
10 tm_topology_t* get_local_topo_with_hwloc(void);
11 tm_topology_t* hwloc_to_tm(char *filename);
12 int int_cmp_inc(const void* x1,const void* x2);
13 void optimize_arity(int **arity, double **cost, int *nb_levels,int n);
14 int symetric(hwloc_topology_t topology);
15 tm_topology_t * tgt_to_tm(char *filename);
16 void tm_display_arity(tm_topology_t *topology);
17 void tm_display_topology(tm_topology_t *topology);
18 void tm_free_topology(tm_topology_t *topology);
19 tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
20 void tm_optimize_topology(tm_topology_t **topology);
21 int  tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology);
22 int topo_nb_proc(hwloc_topology_t topology,int N);
23 void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels);
24 void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints);
25 void topology_cost_cpy(tm_topology_t *topology,double **cost);
26 void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes);
27 double ** topology_to_arch(hwloc_topology_t topology);
28 void   build_synthetic_proc_id(tm_topology_t *topology);
29 tm_topology_t  *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
30 
31 
32 #define LINE_SIZE (1000000)
33 
34 
35 /* transform a tgt scotch file into a topology file*/
tgt_to_tm(char * filename)36 tm_topology_t * tgt_to_tm(char *filename)
37 {
38   tm_topology_t *topology = NULL;
39   FILE *pf = NULL;
40   char line[1024];
41   char *s = NULL;
42   double *cost = NULL;
43   int i;
44 
45 
46 
47   pf = fopen(filename,"r");
48   if(!pf){
49     if(tm_get_verbose_level() >= CRITICAL)
50       fprintf(stderr,"Cannot open %s\n",filename);
51     exit(-1);
52   }
53 
54   if(tm_get_verbose_level() >= INFO)
55     printf("Reading TGT file: %s\n",filename);
56 
57 
58   fgets(line,1024,pf);
59   fclose(pf);
60 
61   s = strstr(line,"tleaf");
62   if(!s){
63     if(tm_get_verbose_level() >= CRITICAL)
64       fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename);
65     exit(-1);
66   }
67 
68   s += 5;
69   while(isspace(*s))
70     s++;
71 
72   topology                 = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
73   topology->nb_constraints = 0;
74   topology->oversub_fact   = 1;
75   topology->constraints    = NULL;
76   topology->nb_levels      = atoi(strtok(s," "))+1;
77   topology->arity          = (int*)MALLOC(sizeof(int)*topology->nb_levels);
78 
79   cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
80 
81   for( i = 0 ; i < topology->nb_levels-1 ; i++ ){
82     topology->arity[i] = atoi(strtok(NULL," "));
83     cost[i] = atoi(strtok(NULL," "));
84   }
85 
86   topology->arity[topology->nb_levels-1] = 0;
87   /* cost[topology->nb_levels-1]=0; */
88 
89   /*aggregate costs*/
90   for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
91     cost[i] += cost[i+1];
92 
93   build_synthetic_proc_id(topology);
94 
95   if(tm_get_verbose_level() >= INFO)
96     printf("Topology built from %s!\n",filename);
97 
98   topology->cost=cost;
99 
100 
101   return topology;
102 }
103 
topo_nb_proc(hwloc_topology_t topology,int N)104 int topo_nb_proc(hwloc_topology_t topology,int N)
105 {
106   hwloc_obj_t *objs = NULL;
107   int nb_proc;
108 
109   objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N);
110   objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL);
111   nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1);
112   FREE(objs);
113   return nb_proc;
114 }
115 
116 
117 
link_cost(int depth)118 static double link_cost(int depth)
119 {
120   /*
121     Bertha values
122     double tab[5]={21,9,4.5,2.5,0.001};
123     double tab[5]={1,1,1,1,1};
124     double tab[6]={100000,10000,1000,500,100,10};
125   */
126   double tab[11] = {1024,512,256,128,64,32,16,8,4,2,1};
127 
128   return tab[depth];
129   /*
130    return 10*log(depth+2);
131    return (depth+1);
132    return (long int)pow(100,depth);
133   */
134 }
135 
136 
topology_to_arch(hwloc_topology_t topology)137 double ** topology_to_arch(hwloc_topology_t topology)
138 {
139   int nb_proc,i,j;
140   hwloc_obj_t obj_proc1,obj_proc2,obj_res;
141   double **arch = NULL;
142 
143   nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
144   if( nb_proc <= 0 ) {  /* if multiple levels with PUs */
145       return NULL;
146   }
147   arch = (double**)MALLOC(sizeof(double*)*nb_proc);
148   if( NULL == arch ) {
149       return NULL;
150   }
151 
152   for( i = 0 ; i < nb_proc ; i++ ){
153     obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
154     arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);
155     for( j = 0 ; j < nb_proc ; j++ ){
156       obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j);
157       obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2);
158       /* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */
159       arch[obj_proc1->os_index][obj_proc2->os_index]=link_cost(obj_res->depth+1);
160     }
161   }
162   return arch;
163 }
164 
symetric(hwloc_topology_t topology)165 int symetric(hwloc_topology_t topology)
166 {
167    int depth,i,topodepth = hwloc_topology_get_depth(topology);
168    unsigned int arity;
169    hwloc_obj_t obj;
170    for ( depth = 0; depth < topodepth-1 ; depth++ ) {
171     int N = hwloc_get_nbobjs_by_depth(topology, depth);
172     obj = hwloc_get_next_obj_by_depth (topology,depth,NULL);
173     arity = obj->arity;
174 
175     /* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */
176     for (i = 1; i < N; i++ ){
177       obj = hwloc_get_next_obj_by_depth (topology,depth,obj);
178       if( obj->arity != arity){
179 	/* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */
180 	return 0;
181       }
182     }
183    }
184    return 1;
185 }
186 
hwloc_to_tm(char * filename)187 tm_topology_t* hwloc_to_tm(char *filename)
188 {
189   hwloc_topology_t topology;
190   tm_topology_t *res = NULL;
191   hwloc_obj_t *objs = NULL;
192   unsigned topodepth,depth;
193   unsigned int nb_nodes;
194   double *cost;
195   int err, l;
196   unsigned int i;
197   int vl = tm_get_verbose_level();
198 
199   /* Build the topology */
200   hwloc_topology_init(&topology);
201   err = hwloc_topology_set_xml(topology,filename);
202   if(err == -1){
203     if(vl >= CRITICAL)
204       fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
205     exit(-1);
206   }
207 
208 #if HWLOC_API_VERSION >= 0x00020000
209   hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
210 #else  /* HWLOC_API_VERSION >= 0x00020000 */
211   hwloc_topology_ignore_all_keep_structure(topology);
212 #endif  /* HWLOC_API_VERSION >= 0x00020000 */
213   hwloc_topology_load(topology);
214 
215 
216   /* Test if symetric */
217   if(!symetric(topology)){
218     if(tm_get_verbose_level() >= CRITICAL)
219       fprintf(stderr,"%s not symetric!\n",filename);
220     exit(-1);
221   }
222 
223   /* work on depth */
224   topodepth = hwloc_topology_get_depth(topology);
225 
226   res                   = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
227   res->oversub_fact      = 1;
228   res->nb_constraints   = 0;
229   res->constraints      = NULL;
230   res->nb_levels        = topodepth;
231   res->node_id          = (int**)MALLOC(sizeof(int*)*res->nb_levels);
232   res->node_rank        = (int**)MALLOC(sizeof(int*)*res->nb_levels);
233   res->nb_nodes         = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
234   res->arity            = (int*)MALLOC(sizeof(int)*res->nb_levels);
235 
236   if(vl >= INFO)
237       printf("topodepth = %d\n",topodepth);
238 
239   /* Build TreeMatch topology */
240   for( depth = 0 ; depth < topodepth ; depth++ ){
241     nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
242     res->nb_nodes[depth] = nb_nodes;
243     res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
244     res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
245 
246     objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
247     objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
248     hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
249     res->arity[depth] = objs[0]->arity;
250 
251     if (depth == topodepth -1){
252       res->nb_constraints = nb_nodes;
253       res->nb_proc_units  = nb_nodes;
254     }
255 
256     if(vl >= DEBUG)
257       printf("\n--%d(%d) **%d**:--\n",res->arity[depth],nb_nodes,res->arity[0]);
258 
259     /* Build process id tab */
260     for (i = 0; i < nb_nodes; i++){
261       if(objs[i]->os_index > nb_nodes){
262 	if(vl >= CRITICAL){
263 	  fprintf(stderr, "Index of object %d of level %d is %d and larger than number of nodes : %d\n",
264 		  i, depth, objs[i]->os_index, nb_nodes);
265 	}
266 	exit(-1);
267       }
268 
269       res->node_id[depth][i] = objs[i]->os_index;
270       res->node_rank[depth][objs[i]->os_index] = i;
271       /* if(depth==topodepth-1) */
272     }
273     FREE(objs);
274 
275 
276   }
277 
278   cost = (double*)CALLOC(res->nb_levels,sizeof(double));
279   for(l=0; l<res->nb_levels; l++){
280     cost[l] = link_cost(l);
281   }
282   res->cost = cost;
283 
284 
285   /* Destroy topology object. */
286   hwloc_topology_destroy(topology);
287   if(tm_get_verbose_level() >= INFO)
288     printf("\n");
289 
290 
291 
292   return res;
293 }
294 
get_local_topo_with_hwloc(void)295 tm_topology_t* get_local_topo_with_hwloc(void)
296 {
297   hwloc_topology_t topology;
298   tm_topology_t *res = NULL;
299   hwloc_obj_t *objs = NULL;
300   unsigned topodepth,depth;
301   int nb_nodes,i;
302 
303   /* Build the topology */
304   hwloc_topology_init(&topology);
305 #if HWLOC_API_VERSION >= 0x00020000
306   hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
307 #else  /* HWLOC_API_VERSION >= 0x00020000 */
308   hwloc_topology_ignore_all_keep_structure(topology);
309 #endif  /* HWLOC_API_VERSION >= 0x00020000 */
310   hwloc_topology_load(topology);
311 
312   /* Test if symetric */
313   if(!symetric(topology)){
314     if(tm_get_verbose_level() >= CRITICAL)
315       fprintf(stderr,"Local toplogy not symetric!\n");
316     exit(-1);
317   }
318 
319   /* work on depth */
320   topodepth = hwloc_topology_get_depth(topology);
321 
322   res                  = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
323   res->nb_constraints  = 0;
324   res->constraints     = NULL;
325   res->nb_levels       = topodepth;
326   res->node_id         = (int**)MALLOC(sizeof(int*)*res->nb_levels);
327   res->node_rank       = (int**)MALLOC(sizeof(int*)*res->nb_levels);
328   res->nb_nodes        = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
329   res->arity           = (int*)MALLOC(sizeof(int)*res->nb_levels);
330 
331   /* Build TreeMatch topology */
332   for( depth = 0 ; depth < topodepth ; depth++ ){
333     nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
334     res->nb_nodes[depth] = nb_nodes;
335     res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
336     res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
337 
338     objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
339     objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
340     hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
341     res->arity[depth] = objs[0]->arity;
342 
343     if (depth == topodepth -1){
344       res->nb_constraints = nb_nodes;
345       res->nb_proc_units = nb_nodes;
346     }
347     /* printf("%d:",res->arity[depth]); */
348 
349     /* Build process id tab */
350     for (i = 0; i < nb_nodes; i++){
351       res->node_id[depth][i] = objs[i]->os_index;
352       res->node_rank[depth][objs[i]->os_index] = i;
353       /* if(depth==topodepth-1) */
354     }
355     FREE(objs);
356   }
357 
358 
359 
360   /* Destroy HWLOC topology object. */
361   hwloc_topology_destroy(topology);
362 
363   /* printf("\n"); */
364   return res;
365 }
366 
367 
tm_free_topology(tm_topology_t * topology)368 void tm_free_topology(tm_topology_t *topology)
369 {
370   int i;
371   for( i = 0 ; i < topology->nb_levels ; i++ ){
372     FREE(topology->node_id[i]);
373     FREE(topology->node_rank[i]);
374   }
375 
376   FREE(topology->constraints);
377   FREE(topology->node_id);
378   FREE(topology->node_rank);
379   FREE(topology->nb_nodes);
380   FREE(topology->arity);
381   FREE(topology->cost);
382   FREE(topology);
383 }
384 
tm_load_topology(char * arch_filename,tm_file_type_t arch_file_type)385 tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type){
386   switch(arch_file_type){
387   case   TM_FILE_TYPE_TGT:
388     return  tgt_to_tm(arch_filename);
389   case TM_FILE_TYPE_XML:
390     return hwloc_to_tm(arch_filename);
391   default:
392     if(tm_get_verbose_level() >= ERROR){
393       fprintf(stderr,"Error loading topology. Filetype %d unknown\n", arch_file_type);
394     }
395     exit(-1);
396   }
397 }
398 
399 
tm_display_topology(tm_topology_t * topology)400 void tm_display_topology(tm_topology_t *topology)
401 {
402   int i;
403   unsigned int j;
404   unsigned long  id;
405   for( i = 0 ; i < topology->nb_levels ; i++ ){
406     printf("%d: ",i);
407     for( j = 0 ; j < topology->nb_nodes[i] ; j++)
408       printf("%d ",topology->node_id[i][j]);
409     printf("\n");
410   }
411 
412   printf("Last level: ");
413   for(id = 0; id < topology->nb_nodes[topology->nb_levels-1]/topology->oversub_fact; id++)
414     printf("%d ",topology->node_rank[topology->nb_levels-1][id]);
415   printf("\n");
416 
417 
418   if(topology->constraints){
419     printf("Constraints: ");
420     for(i = 0; i < topology->nb_constraints; i++)
421       printf("%d ",topology->constraints[i]);
422     printf("\n");
423   }
424 
425   printf("\tnb_levels=%d\n\tnb_constraints=%d\n\toversub_fact=%d\n\tnb proc units=%d\n\n",
426 	 topology->nb_levels, topology->nb_constraints, topology->oversub_fact, topology->nb_proc_units);
427 
428 }
429 
430 
tm_display_arity(tm_topology_t * topology)431 void tm_display_arity(tm_topology_t *topology){
432   int depth;
433   for(depth=0; depth < topology->nb_levels; depth++)
434     printf("%d(%lf): ",topology->arity[depth], topology->cost[depth]);
435 
436   printf("\n");
437 }
438 
int_cmp_inc(const void * x1,const void * x2)439 int int_cmp_inc(const void* x1,const void* x2)
440 {
441   return *((int *)x1) < *((int *)x2) ? -1 : 1;
442 }
443 
444 
topo_check_constraints(tm_topology_t * topology)445 static int topo_check_constraints(tm_topology_t *topology){
446   int n = topology->nb_constraints;
447   int i;
448   int depth = topology->nb_levels-1;
449   for (i=0;i<n;i++){
450     if(!in_tab(topology->node_id[depth], topology->nb_nodes[depth], topology->constraints[i])){
451       if(tm_get_verbose_level() >= CRITICAL){
452 	fprintf(stderr,"Error! Incompatible constraint with the topology: rank %d in the constraints is not a valid id of any nodes of the topology.\n",topology->constraints[i]);
453       }
454       return 0;
455     }
456   }
457   return 1;
458 }
459 
460 
461 
462 
463 /* cpy flag tells if we need to copy the array.
464    Set to 1 when called from the application level and 0 when called from inside the library*/
tm_topology_set_binding_constraints_cpy(int * constraints,int nb_constraints,tm_topology_t * topology,int cpy_flag)465 static int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){
466 
467   topology -> nb_constraints = nb_constraints;
468   if(cpy_flag){
469     topology -> constraints    =  (int*)MALLOC(nb_constraints*sizeof(int));
470     memcpy(topology -> constraints, constraints, nb_constraints*sizeof(int));
471   }else{
472     topology -> constraints    = constraints;
473   }
474 
475   return topo_check_constraints(topology);
476 }
477 
tm_topology_set_binding_constraints(int * constraints,int nb_constraints,tm_topology_t * topology)478 int tm_topology_set_binding_constraints(int *constraints, int nb_constraints, tm_topology_t *topology){
479   return tm_topology_set_binding_constraints_cpy(constraints, nb_constraints, topology, 1);
480 }
481 
tm_topology_add_binding_constraints(char * constraints_filename,tm_topology_t * topology)482 int  tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology)
483 {
484   int *tab = NULL;
485   FILE *pf = NULL;
486   char  line[LINE_SIZE],*l = NULL;
487   char *ptr = NULL;
488   int i,n;
489   unsigned int vl = tm_get_verbose_level();
490 
491 
492   if (!(pf = fopen(constraints_filename,"r"))) {
493     if(vl >= CRITICAL)
494       fprintf(stderr,"Cannot open %s\n",constraints_filename);
495     exit(-1);
496   }
497 
498   /* compute the size of the array to store the constraints*/
499   n = 0;
500   fgets(line, LINE_SIZE, pf);
501   l = line;
502   while((ptr=strtok(l," \t"))){
503     l = NULL;
504     if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr))
505       n++;
506   }
507 
508   tab = (int*)MALLOC(n*sizeof(int));
509 
510   rewind(pf);
511   fgets(line, LINE_SIZE, pf);
512   fclose(pf);
513   l = line;
514   i = 0;
515   while((ptr=strtok(l," \t"))){
516     l = NULL;
517     if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr)){
518       if(i < n)
519 	tab[i] = atoi(ptr);
520       else{
521 	if(vl >= CRITICAL)
522 	  fprintf(stderr, "More than %d entries in %s\n", n, constraints_filename);
523 	exit(-1);
524       }
525       i++;
526     }
527   }
528 
529   if( i != n ){
530     if(vl >= CRITICAL)
531       fprintf(stderr, "Read %d entries while expecting %d ones\n", i, n);
532     exit(-1);
533   }
534 
535   qsort(tab,n,sizeof(int),int_cmp_inc);
536 
537   return tm_topology_set_binding_constraints_cpy(tab, n, topology, 0);
538 }
539 
540 
topology_numbering_cpy(tm_topology_t * topology,int ** numbering,int * nb_nodes)541 void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes)
542 {
543   int nb_levels;
544   unsigned int vl = tm_get_verbose_level();
545 
546   nb_levels = topology->nb_levels;
547   *nb_nodes = topology->nb_nodes[nb_levels-1];
548   if(vl >= INFO)
549     printf("nb_nodes=%d\n",*nb_nodes);
550   *numbering = (int*)MALLOC(sizeof(int)*(*nb_nodes));
551   memcpy(*numbering,topology->node_id[nb_levels-1],sizeof(int)*(*nb_nodes));
552 }
553 
topology_arity_cpy(tm_topology_t * topology,int ** arity,int * nb_levels)554 void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels)
555 {
556   *nb_levels = topology->nb_levels;
557   *arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
558   memcpy(*arity,topology->arity,sizeof(int)*(*nb_levels));
559 }
560 
topology_constraints_cpy(tm_topology_t * topology,int ** constraints,int * nb_constraints)561 void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints)
562 {
563   *nb_constraints = topology->nb_constraints;
564   if(topology->constraints){
565     *constraints = (int*)MALLOC(sizeof(int)*(*nb_constraints));
566     memcpy(*constraints,topology->constraints,sizeof(int)*(*nb_constraints));
567   }else{
568     *constraints = NULL;
569   }
570 }
571 
topology_cost_cpy(tm_topology_t * topology,double ** cost)572 void topology_cost_cpy(tm_topology_t *topology,double **cost)
573 {
574   *cost = (double*)MALLOC(sizeof(double)*(topology->nb_levels));
575   memcpy(*cost,topology->cost,sizeof(double)*(topology->nb_levels));
576 }
577 
optimize_arity(int ** arity,double ** cost,int * nb_levels,int n)578 void optimize_arity(int **arity, double **cost, int *nb_levels,int n)
579 {
580   int a,i;
581   int *new_arity = NULL;
582   double *new_cost = NULL;
583 
584   if( n < 0 )
585     return;
586   /*   printf("n=%d\tnb_levels=%d\n",n,*nb_levels); */
587   /*   for(i=0;i<*nb_levels;i++) */
588   /*     printf("%d:",(*arity)[i]); */
589   /*   printf("\n");   */
590   /* if(n==(*nb_levels)-3) */
591   /*  exit(-1); */
592   a = (*arity)[n];
593   if( (a%3 == 0) && (a > 3) ){
594     /*
595     check if the arity of level n devides 3
596     If this is the case:
597     Add a level
598     */
599     (*nb_levels)++;
600     /* Build a new arity and cost arrays  */
601     new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
602     new_cost  = (double*)MALLOC(sizeof(double)*(*nb_levels));
603     /*  Copy the begining if the old arrays */
604     for( i = 0 ; i < n ; i++){
605       new_arity[i] = (*arity)[i];
606       new_cost[i] = (*cost)[i];
607     }
608     /* set the nth level to arity 3  */
609     new_arity[n] = 3;
610     /* copy the cost to this level*/
611     new_cost[n] = (*cost)[n];;
612     /* printf("a=%d\n",a); */
613     /* Set the (n+1) level to arity a/3 */
614     new_arity[n+1] = a/3;
615     /*Dupliacte the cost as it is the same level originally*/
616     new_cost[n+1] = (*cost)[n];
617     /* Copy the end of the arrays */
618     for( i = n+2 ; i < *nb_levels ; i++){
619       new_arity[i] = (*arity)[i-1];
620       new_cost[i] = (*cost)[i-1];
621     }
622     FREE(*arity);
623     FREE(*cost);
624     /* if a/3 =3 then go to the next level */
625     if(new_arity[n+1] == 3)
626       optimize_arity(&new_arity,&new_cost,nb_levels,n);
627     else /* continue to this level (remember we just add a new level */
628       optimize_arity(&new_arity,&new_cost,nb_levels,n+1);
629     *arity=new_arity;
630     *cost=new_cost;
631   }else if( (a%2==0) && (a>2) ){/* same as above but for arity == 2 instead of 3 */
632     (*nb_levels)++;
633     new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
634     new_cost  = (double*)MALLOC(sizeof(double)*(*nb_levels));
635     for( i = 0 ; i < n ; i++ ){
636       new_arity[i] = (*arity)[i];
637       new_cost[i] = (*cost)[i];
638     }
639     new_arity[n] = 2;
640     new_cost[n] = (*cost)[n];;
641     /* printf("a=%d\n",a); */
642     new_arity[n+1] = a/2;
643     new_cost[n+1] = (*cost)[n];
644     for( i = n+2 ; i < *nb_levels ; i++ ){
645       new_arity[i] = (*arity)[i-1];
646       new_cost[i] = (*cost)[i-1];
647     }
648    FREE(*arity);
649     FREE(*cost);
650     if(new_arity[n+1] == 2)
651       optimize_arity(&new_arity, &new_cost, nb_levels, n);
652     else
653       optimize_arity(&new_arity, &new_cost, nb_levels, n+1);
654     *arity = new_arity;
655     *cost= new_cost;
656   }else /* if nothing works go to next level.  */
657     optimize_arity(arity, cost, nb_levels,n-1);
658 }
659 
660 
661 
662 
tm_optimize_topology(tm_topology_t ** topology)663 void tm_optimize_topology(tm_topology_t **topology){
664   int *arity = NULL,nb_levels;
665   int *numbering = NULL,nb_nodes;
666   tm_topology_t *new_topo;
667   double *cost;
668   unsigned int vl = tm_get_verbose_level();
669   int *constraints = NULL, nb_constraints;
670   int i;
671 
672   if(vl >= DEBUG)
673     tm_display_arity(*topology);
674 
675   topology_arity_cpy(*topology,&arity,&nb_levels);
676   topology_numbering_cpy(*topology,&numbering,&nb_nodes);
677   topology_constraints_cpy(*topology,&constraints,&nb_constraints);
678   topology_cost_cpy(*topology,&cost);
679 
680 
681   optimize_arity(&arity,&cost,&nb_levels,nb_levels-2);
682   new_topo = tm_build_synthetic_topology(arity, NULL, nb_levels,numbering,nb_nodes);
683   new_topo->cost = cost;
684   new_topo->constraints    = constraints;
685   new_topo->nb_constraints = nb_constraints;
686   new_topo->nb_proc_units  = (*topology)->nb_proc_units;
687   new_topo->oversub_fact   = (*topology)->oversub_fact;
688 
689 
690 
691   if(vl >= DEBUG){
692     if(constraints){
693       printf("Constraints: ");
694       for(i=0;i<nb_constraints;i++)
695 	printf("%d - ",constraints[i]);
696       printf("\n");
697     }
698 
699     tm_display_arity(new_topo);
700   }
701   FREE(arity);
702   FREE(numbering);
703   tm_free_topology(*topology);
704 
705   *topology = new_topo;
706   /*  exit(-1); */
707 
708 
709 }
710 
711 
712 
713 /*
714    Build a synthetic balanced topology
715 
716    arity : array of arity of the first nb_level (of size nb_levels)
717    cost : array of costs between the levels (of size nb_levels)
718    core_numbering: numbering of the core by the system. Array of size nb_core_per_node
719 
720    nb_core_per_nodes: number of cores of a given node size of the array core_numbering
721 
722    The numbering of the cores is done in round robin fashion after a width traversal of the topology.
723    for example:
724        {0,1,2,3} becomes 0,1,2,3,4,5,6,7...
725    and
726        {0,2,1,3} becomes 0,2,1,3,4,6,5,7,...
727  */
728 
tm_build_synthetic_topology(int * arity,double * cost,int nb_levels,int * core_numbering,int nb_core_per_nodes)729 tm_topology_t  *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes)
730 {
731   tm_topology_t *topology = NULL;
732   int i,j,n;
733 
734 
735   topology                 = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
736   topology->nb_constraints = 0;
737   topology->oversub_fact   = 1;
738   topology->constraints    = NULL;
739   topology->nb_levels      = nb_levels;
740   topology->arity          = (int*)MALLOC(sizeof(int)*topology->nb_levels);
741   topology->node_id        = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
742   topology->node_rank      = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
743   topology->nb_nodes       = (size_t *)MALLOC(sizeof(size_t)*topology->nb_levels);
744   if(cost)
745     topology->cost         = (double*)CALLOC(topology->nb_levels,sizeof(double));
746   else
747     topology->cost         = NULL;
748 
749   memcpy(topology->arity, arity, sizeof(int)*nb_levels);
750   if(cost)
751     memcpy(topology->cost, cost, sizeof(double)*nb_levels);
752 
753   n = 1;
754   for( i = 0 ; i < topology->nb_levels ; i++ ){
755     topology->nb_nodes[i] = n;
756     topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
757     topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
758     if( i < topology->nb_levels-1){
759       for( j = 0 ; j < n ; j++ ){
760 	topology->node_id[i][j] = j;
761 	topology->node_rank[i][j]=j;
762       }
763     }else{
764       for( j = 0 ; j < n ; j++ ){
765 	int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
766 	topology->node_id[i][j] = id;
767 	topology->node_rank[i][id] = j;
768       }
769     }
770 
771 
772     if (i == topology->nb_levels-1){
773       topology->nb_constraints = n;
774       topology->nb_proc_units = n;
775     }
776 
777     n *= topology->arity[i];
778   }
779   if(cost){
780     /*aggregate costs*/
781     for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
782       topology->cost[i] += topology->cost[i+1];
783   }
784 
785   return topology;
786 }
787 
788 
build_synthetic_proc_id(tm_topology_t * topology)789 void   build_synthetic_proc_id(tm_topology_t *topology)
790 {
791   int i;
792   size_t j,n = 1;
793 
794   topology->node_id   = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
795   topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
796   topology->nb_nodes  = (size_t*) MALLOC(sizeof(size_t)*topology->nb_levels);
797 
798   for( i = 0 ; i < topology->nb_levels ; i++ ){
799     /* printf("n= %lld, arity := %d\n",n, topology->arity[i]); */
800     topology->nb_nodes[i] = n;
801     topology->node_id[i] = (int*)MALLOC(sizeof(long int)*n);
802     topology->node_rank[i] = (int*)MALLOC(sizeof(long int)*n);
803     if ( !topology->node_id[i] ){
804       if(tm_get_verbose_level() >= CRITICAL)
805 	fprintf(stderr,"Cannot allocate level %d (of size %ld) of the topology\n", i, (unsigned long int)n);
806       exit(-1);
807     }
808 
809     if (i == topology->nb_levels-1){
810       topology->nb_constraints = n;
811       topology->nb_proc_units = n;
812     }
813 
814 
815 
816     for( j = 0 ; j < n ; j++ ){
817       topology->node_id[i][j] = j;
818       topology->node_rank[i][j] = j;
819     }
820     n *= topology->arity[i];
821   }
822 
823 }
824 
825 
826 
tm_enable_oversubscribing(tm_topology_t * topology,unsigned int oversub_fact)827 void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact){
828 {
829   int i,j,n;
830 
831   if(oversub_fact <=1)
832     return;
833 
834   topology -> nb_levels ++;
835   topology -> arity        = (int*)    REALLOC(topology->arity, sizeof(int)*topology->nb_levels);
836   topology -> cost         = (double*) REALLOC(topology->cost, sizeof(double)*topology->nb_levels);
837   topology -> node_id      = (int**)   REALLOC(topology->node_id, sizeof(int*)*topology->nb_levels);
838   topology -> node_rank    = (int**)   REALLOC(topology->node_rank, sizeof(int*)*topology->nb_levels);
839   topology -> nb_nodes     = (size_t *)REALLOC(topology->nb_nodes, sizeof(size_t)*topology->nb_levels);
840   topology -> oversub_fact = oversub_fact;
841 
842   i = topology->nb_levels - 1;
843   n = topology->nb_nodes[i-1] * oversub_fact;
844   topology->arity[i-1] = oversub_fact;
845   topology->cost[i-1] = 0;
846   topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
847   topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
848   topology->nb_nodes[i] = n;
849 
850   for( j = 0 ; j < n ; j++ ){
851     int id = topology->node_id[i-1][j/oversub_fact];
852     topology->node_id[i][j] = id;
853     topology->node_rank[i][id] = j;
854   }
855  }
856 
857 }
858