1 #include <ctype.h>
2 #include <float.h>
3 #include "tm_solution.h"
4 #include "tm_mt.h"
5 #include "tm_mapping.h"
6
7 typedef struct {
8 int val;
9 long key;
10 } hash_t;
11
12
tm_free_solution(tm_solution_t * sol)13 void tm_free_solution(tm_solution_t *sol){
14 int i,n;
15
16 n = sol->k_length;
17
18 if(sol->k)
19 for(i=0 ; i<n ; i++)
20 FREE(sol->k[i]);
21
22 FREE(sol->k);
23 FREE(sol->sigma);
24 FREE(sol);
25 }
26
27 /*
28 Compute the distance in the tree
29 between node i and j : the farther away node i and j, the
30 larger the returned value.
31
32 The algorithm looks at the largest level, starting from the top,
33 for which node i and j are still in the same subtree. This is done
34 by iteratively dividing their numbering by the arity of the levels
35 */
distance(tm_topology_t * topology,int i,int j)36 int distance(tm_topology_t *topology,int i, int j)
37 {
38 int level = 0;
39 int arity;
40 int f_i, f_j ;
41 int vl = tm_get_verbose_level();
42 int depth = topology->nb_levels-1;
43
44 f_i = topology->node_rank[depth][i];
45 f_j = topology->node_rank[depth][j];
46
47 if(vl >= DEBUG)
48 printf("i=%d, j=%d Level = %d f=(%d,%d)\n",i ,j, level, f_i, f_j);
49
50
51 do{
52 level++;
53 arity = topology->arity[level];
54 if( arity == 0 )
55 arity = 1;
56 f_i = f_i/arity;
57 f_j = f_j/arity;
58 } while((f_i!=f_j) && (level < depth));
59
60 if(vl >= DEBUG)
61 printf("distance(%d,%d):%d\n",topology->node_rank[depth][i], topology->node_rank[depth][j], level);
62 /* exit(-1); */
63 return level;
64 }
65
display_sol_sum_com(tm_topology_t * topology,tm_affinity_mat_t * aff_mat,int * sigma)66 double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
67 {
68 double a,c,sol;
69 int i,j;
70 double *cost = topology->cost;
71 double **mat = aff_mat->mat;
72 int N = aff_mat->order;
73 int depth = topology->nb_levels - 1;
74
75
76 sol = 0;
77 for ( i = 0 ; i < N ; i++ )
78 for ( j = i+1 ; j < N ; j++){
79 c = mat[i][j];
80 /*
81 Compute cost in funvtion of the inverse of the distance
82 This is due to the fact that the cost matrix is numbered
83 from top to bottom : cost[0] is the cost of the longest distance.
84 */
85 a = cost[depth-distance(topology,sigma[i],sigma[j])];
86 if(tm_get_verbose_level() >= DEBUG)
87 printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a);
88 sol += c*a;
89 }
90
91 for (i = 0; i < N; i++) {
92 printf("%d", sigma[i]);
93 if(i<N-1)
94 printf(",");
95 }
96 printf(" : %g\n",sol);
97
98 return sol;
99 }
100
101
display_sol_max_com(tm_topology_t * topology,tm_affinity_mat_t * aff_mat,int * sigma)102 static double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
103 {
104 double a,c,sol;
105 int i,j;
106 double *cost = topology->cost;
107 double **mat = aff_mat->mat;
108 int N = aff_mat->order;
109 int vl = tm_get_verbose_level();
110 int depth = topology->nb_levels - 1;
111
112 sol = 0;
113 for ( i = 0 ; i < N ; i++ )
114 for ( j = i+1 ; j < N ; j++){
115 c = mat[i][j];
116 /*
117 Compute cost in funvtion of the inverse of the distance
118 This is due to the fact that the cost matrix is numbered
119 from top to bottom : cost[0] is the cost of the longest distance.
120 */
121 a = cost[depth-distance(topology,sigma[i],sigma[j])];
122 if(vl >= DEBUG)
123 printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a);
124 if(c*a > sol)
125 sol = c*a;
126 }
127
128 for (i = 0; i < N; i++) {
129 printf("%d", sigma[i]);
130 if(i<N-1)
131 printf(",");
132 }
133 printf(" : %g\n",sol);
134
135 return sol;
136 }
137
display_sol_hop_byte(tm_topology_t * topology,tm_affinity_mat_t * aff_mat,int * sigma)138 static double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
139 {
140 double c,sol;
141 int nb_hops;
142 int i,j;
143 double **mat = aff_mat->mat;
144 int N = aff_mat->order;
145
146 sol = 0;
147 for ( i = 0 ; i < N ; i++ )
148 for ( j = i+1 ; j < N ; j++){
149 c = mat[i][j];
150 nb_hops = 2*distance(topology,sigma[i],sigma[j]);
151 if(tm_get_verbose_level() >= DEBUG)
152 printf("T_%d_%d %f*%d=%f\n",i,j,c,nb_hops,c*nb_hops);
153 sol += c*nb_hops;
154 }
155
156 for (i = 0; i < N; i++) {
157 printf("%d", sigma[i]);
158 if(i<N-1)
159 printf(",");
160 }
161 printf(" : %g\n",sol);
162
163 return sol;
164 }
165
166
167
display_sol(tm_topology_t * topology,tm_affinity_mat_t * aff_mat,int * sigma,tm_metric_t metric)168 double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric){
169 switch (metric){
170 case TM_METRIC_SUM_COM:
171 return display_sol_sum_com(topology, aff_mat, sigma);
172 case TM_METRIC_MAX_COM:
173 return display_sol_max_com(topology, aff_mat, sigma);
174 case TM_METRIC_HOP_BYTE:
175 return display_sol_hop_byte(topology, aff_mat, sigma);
176 default:
177 if(tm_get_verbose_level() >= ERROR){
178 fprintf(stderr,"Error printing solution: metric %d not implemented\n",metric);
179 return -1;
180 }
181 }
182 return -1;
183 }
184
tm_display_solution(tm_topology_t * topology,tm_affinity_mat_t * aff_mat,tm_solution_t * sol,tm_metric_t metric)185 double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol,
186 tm_metric_t metric){
187
188 int i,j;
189 int **k = sol->k;
190
191
192 if(tm_get_verbose_level() >= DEBUG){
193 printf("k: \n");
194 for( i = 0 ; i < nb_processing_units(topology) ; i++ ){
195 if(k[i][0] != -1){
196 printf("\tProcessing unit %d: ",i);
197 for (j = 0 ; j<topology->oversub_fact; j++){
198 if( k[i][j] == -1)
199 break;
200 printf("%d ",k[i][j]);
201 }
202 printf("\n");
203 }
204 }
205 }
206
207
208 return display_sol(topology, aff_mat, sol->sigma, metric);
209 }
210
tm_display_other_heuristics(tm_topology_t * topology,tm_affinity_mat_t * aff_mat,tm_metric_t metric)211 void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric)
212 {
213 int *sigma = NULL;
214 int N = aff_mat->order;
215
216 sigma = (int*)MALLOC(sizeof(int)*N);
217
218 map_Packed(topology, N, sigma);
219 printf("Packed: ");
220 display_sol(topology, aff_mat, sigma, metric);
221
222 map_RR(topology, N, sigma);
223 printf("RR: ");
224 display_sol(topology, aff_mat, sigma, metric);
225
226 /* double duration; */
227 /* CLOCK_T time1,time0; */
228 /* CLOCK(time0); */
229 /* map_MPIPP(topology,1,N,sigma,comm,arch); */
230 /* CLOCK(time1); */
231 /* duration=CLOCK_DIFF(time1,time0); */
232 /* printf("MPIPP-1-D:%f\n",duration); */
233 /* printf("MPIPP-1: "); */
234 /* if (TGT_flag == 1) */
235 /* print_sigma_inv(N,sigma,comm,arch); */
236 /* else */
237 /* print_sigma(N,sigma,comm,arch); */
238
239 /* CLOCK(time0); */
240 /* map_MPIPP(topology,5,N,sigma,comm,arch); */
241 /* CLOCK(time1); */
242 /* duration=CLOCK_DIFF(time1,time0); */
243 /* printf("MPIPP-5-D:%f\n",duration); */
244 /* printf("MPIPP-5: "); */
245 /* if (TGT_flag == 1) */
246 /* print_sigma_inv(N,sigma,comm,arch); */
247 /* else */
248 /* print_sigma(N,sigma,comm,arch); */
249
250 FREE(sigma);
251 }
252
253
in_tab(int * tab,int n,int val)254 int in_tab(int *tab, int n, int val){
255 int i;
256 for( i = 0; i < n ; i++)
257 if(tab[i] == val)
258 return 1;
259
260 return 0;
261 }
262
map_Packed(tm_topology_t * topology,int N,int * sigma)263 void map_Packed(tm_topology_t *topology, int N, int *sigma)
264 {
265 size_t i;
266 int j = 0,depth;
267 int vl = tm_get_verbose_level();
268
269 depth = topology->nb_levels-1;
270
271 for( i = 0 ; i < topology->nb_nodes[depth] ; i++){
272 /* printf ("%d -> %d\n",objs[i]->os_index,i); */
273 if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[depth][i]))){
274 if(vl >= DEBUG)
275 printf ("%lu: %d -> %d\n", i, j, topology->node_id[depth][i]);
276 sigma[j++]=topology->node_id[depth][i];
277 if(j == N)
278 break;
279 }
280 }
281 }
282
map_RR(tm_topology_t * topology,int N,int * sigma)283 void map_RR(tm_topology_t *topology, int N,int *sigma)
284 {
285 int i;
286 int vl = tm_get_verbose_level();
287
288 for( i = 0 ; i < N ; i++ ){
289 if(topology->constraints)
290 sigma[i]=topology->constraints[i%topology->nb_constraints];
291 else
292 sigma[i]=i%topology->nb_proc_units;
293 if(vl >= DEBUG)
294 printf ("%d -> %d (%d)\n",i,sigma[i],topology->nb_proc_units);
295 }
296 }
297
hash_asc(const void * x1,const void * x2)298 int hash_asc(const void* x1,const void* x2)
299 {
300 hash_t *e1 = NULL,*e2 = NULL;
301
302 e1 = ((hash_t*)x1);
303 e2 = ((hash_t*)x2);
304
305 return (e1->key < e2->key) ? -1 : 1;
306 }
307
308
generate_random_sol(tm_topology_t * topology,int N,int level,int seed)309 int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed)
310 {
311 hash_t *hash_tab = NULL;
312 int *sol = NULL;
313 int *nodes_id= NULL;
314 int i;
315
316 nodes_id = topology->node_id[level];
317
318 hash_tab = (hash_t*)MALLOC(sizeof(hash_t)*N);
319 sol = (int*)MALLOC(sizeof(int)*N);
320
321 init_genrand(seed);
322
323 for( i = 0 ; i < N ; i++ ){
324 hash_tab[i].val = nodes_id[i];
325 hash_tab[i].key = genrand_int32();
326 }
327
328 qsort(hash_tab,N,sizeof(hash_t),hash_asc);
329 for( i = 0 ; i < N ; i++ )
330 sol[i] = hash_tab[i].val;
331
332 FREE(hash_tab);
333 return sol;
334 }
335
336
eval_sol(int * sol,int N,double ** comm,double ** arch)337 double eval_sol(int *sol,int N,double **comm, double **arch)
338 {
339 double a,c,res;
340 int i,j;
341
342 res = 0;
343 for ( i = 0 ; i < N ; i++ )
344 for ( j = i+1 ; j < N ; j++ ){
345 c = comm[i][j];
346 a = arch[sol[i]][sol[j]];
347 res += c/a;
348 }
349
350 return res;
351 }
352
exchange(int * sol,int i,int j)353 void exchange(int *sol,int i,int j)
354 {
355 int tmp;
356 tmp = sol[i];
357 sol[i] = sol[j];
358 sol[j] = tmp;
359 }
360
gain_exchange(int * sol,int l,int m,double eval1,int N,double ** comm,double ** arch)361 double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch)
362 {
363 double eval2;
364 if( l == m )
365 return 0;
366 exchange(sol,l,m);
367 eval2 = eval_sol(sol,N,comm,arch);
368 exchange(sol,l,m);
369
370 return eval1-eval2;
371 }
372
select_max(int * l,int * m,double ** gain,int N,int * state)373 void select_max(int *l,int *m,double **gain,int N,int *state)
374 {
375 double max;
376 int i,j;
377
378 max = -DBL_MAX;
379
380 for( i = 0 ; i < N ; i++ )
381 if(!state[i])
382 for( j = 0 ; j < N ; j++ )
383 if( (i != j) && (!state[j]) ){
384 if(gain[i][j] > max){
385 *l = i;
386 *m = j;
387 max=gain[i][j];
388 }
389 }
390 }
391
392
compute_gain(int * sol,int N,double ** gain,double ** comm,double ** arch)393 void compute_gain(int *sol,int N,double **gain,double **comm, double **arch)
394 {
395 double eval1;
396 int i,j;
397
398 eval1 = eval_sol(sol,N,comm,arch);
399 for( i = 0 ; i < N ; i++ )
400 for( j = 0 ; j <= i ; j++)
401 gain[i][j] = gain[j][i] = gain_exchange(sol,i,j,eval1,N,comm,arch);
402 }
403
404
405 /* Randomized Algorithm of
406 Hu Chen, Wenguang Chen, Jian Huang ,Bob Robert,and H.Kuhn. Mpipp: an automatic profile-guided
407 parallel process placement toolset for smp clusters and multiclusters. In
408 Gregory K. Egan and Yoichi Muraoka, editors, ICS, pages 353-360. ACM, 2006.
409 */
410
map_MPIPP(tm_topology_t * topology,int nb_seed,int N,int * sigma,double ** comm,double ** arch)411 void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch)
412 {
413 int *sol = NULL;
414 int *state = NULL;
415 double **gain = NULL;
416 int **history = NULL;
417 double *temp = NULL;
418 int i,j,t,l=0,m=0,seed=0;
419 double max,sum,best_eval,eval;
420
421 gain = (double**)MALLOC(sizeof(double*)*N);
422 history = (int**)MALLOC(sizeof(int*)*N);
423 for( i = 0 ; i < N ; i++){
424 gain[i] = (double*)MALLOC(sizeof(double)*N);
425 history[i] = (int*)MALLOC(sizeof(int)*3);
426 }
427
428 state = (int*)MALLOC(sizeof(int)*N);
429 temp = (double*)MALLOC(sizeof(double)*N);
430
431 sol = generate_random_sol(topology,N,topology->nb_levels-1,seed++);
432 for( i = 0 ; i < N ; i++)
433 sigma[i] = sol[i];
434
435 best_eval = DBL_MAX;
436 while(seed <= nb_seed){
437 do{
438 for( i = 0 ; i < N ; i++ ){
439 state[i] = 0;
440 /* printf("%d ",sol[i]); */
441 }
442 /* printf("\n"); */
443 compute_gain(sol,N,gain,comm,arch);
444 /*
445 display_tab(gain,N);
446 exit(-1);
447 */
448 for( i = 0 ; i < N/2 ; i++ ){
449 select_max(&l,&m,gain,N,state);
450 /* printf("%d: %d <=> %d : %f\n",i,l,m,gain[l][m]); */
451 state[l] = 1;
452 state[m] = 1;
453 exchange(sol,l,m);
454 history[i][1] = l;
455 history[i][2] = m;
456 temp[i] = gain[l][m];
457 compute_gain(sol,N,gain,comm,arch);
458 }
459
460 t = -1;
461 max = 0;
462 sum = 0;
463 for(i = 0 ; i < N/2 ; i++ ){
464 sum += temp[i];
465 if( sum > max ){
466 max = sum;
467 t = i;
468 }
469 }
470 /*for(j=0;j<=t;j++)
471 printf("exchanging: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */
472 for( j = t+1 ; j < N/2 ; j++ ){
473 exchange(sol,history[j][1],history[j][2]);
474 /* printf("Undoing: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */
475 }
476 /* printf("max=%f\n",max); */
477
478 /*for(i=0;i<N;i++){
479 printf("%d ",sol[i]);
480 }
481 printf("\n");*/
482 eval = eval_sol(sol,N,comm,arch);
483 if(eval < best_eval){
484 best_eval = eval;
485 for(i = 0 ; i < N ; i++)
486 sigma[i] = sol[i];
487 /* print_sol(N); */
488 }
489 }while( max > 0 );
490 FREE(sol);
491 sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++);
492 }
493
494
495 FREE(sol);
496 FREE(temp);
497 FREE(state);
498 for( i = 0 ; i < N ; i++){
499 FREE(gain[i]);
500 FREE(history[i]);
501 }
502 FREE(gain);
503 FREE(history);
504 }
505