1 /*
2  * Copyright © 2016-2017 Inria.  All rights reserved.
3  *
4  * $COPYRIGHT$
5  *
6  * Additional copyrights may follow
7  * See COPYING in top-level directory.
8  *
9  * $HEADER$
10  */
11 
12 #define _GNU_SOURCE         /* See feature_test_macros(7) */
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <sys/types.h>
16 #include <dirent.h>
17 #include <scotch.h>
18 
19 #include <netloc.h>
20 #include <netlocscotch.h>
21 #include <private/netloc.h>
22 #include <hwloc.h>
23 
24 static int arch_tree_to_scotch_arch(netloc_arch_tree_t *tree, SCOTCH_Arch *scotch);
25 static int comm_matrix_to_scotch_graph(double **matrix, int n, SCOTCH_Graph *graph);
26 static int netlocscotch_get_mapping_from_graph(SCOTCH_Graph *graph,
27         netlocscotch_core_t **pcores);
28 
compareint(void const * a,void const * b)29 static int compareint(void const *a, void const *b)
30 {
31    const int *int_a = (const int *)a;
32    const int *int_b = (const int *)b;
33    return *int_a-*int_b;
34 }
35 
build_subarch(SCOTCH_Arch * scotch,NETLOC_int num_nodes,NETLOC_int * node_list,SCOTCH_Arch * subarch)36 static int build_subarch(SCOTCH_Arch *scotch, NETLOC_int num_nodes, NETLOC_int *node_list,
37         SCOTCH_Arch *subarch)
38 {
39     int ret;
40 
41     /* Hack to avoid problem with unsorted node list in the subarch and scotch
42      * FIXME TODO */
43     qsort(node_list, num_nodes, sizeof(*node_list), compareint);
44 
45     ret = SCOTCH_archSub(subarch, scotch, num_nodes, node_list);
46     if (ret != 0) {
47         fprintf(stderr, "Error: SCOTCH_archSub failed\n");
48     }
49 
50     return ret;
51 }
52 
53 /* Convert a netloc tree to a scotch tleaf architecture */
arch_tree_to_scotch_arch(netloc_arch_tree_t * tree,SCOTCH_Arch * scotch)54 int arch_tree_to_scotch_arch(netloc_arch_tree_t *tree, SCOTCH_Arch *scotch)
55 {
56     int ret;
57 
58     ret = SCOTCH_archTleaf(scotch, tree->num_levels, tree->degrees, tree->cost);
59     if (ret != 0) {
60         fprintf(stderr, "Error: SCOTCH_archTleaf failed\n");
61         return NETLOC_ERROR;
62     }
63 
64     return NETLOC_SUCCESS;
65 }
66 
build_subgraph(SCOTCH_Graph * graph,int * vertices,int num_vertices,SCOTCH_Graph * nodegraph)67 static int build_subgraph(SCOTCH_Graph *graph, int *vertices, int num_vertices,
68         SCOTCH_Graph *nodegraph)
69 {
70     int ret;
71 
72     SCOTCH_Num base;       /* Base value               */
73     SCOTCH_Num vert;       /* Number of vertices       */
74     SCOTCH_Num *verttab;   /* Vertex array [vertnbr+1] */
75     SCOTCH_Num *vendtab;   /* Vertex array [vertnbr]   */
76     SCOTCH_Num *velotab;   /* Vertex load array        */
77     SCOTCH_Num *vlbltab;   /* Vertex label array       */
78     SCOTCH_Num edge;       /* Number of edges (arcs)   */
79     SCOTCH_Num *edgetab;   /* Edge array [edgenbr]     */
80     SCOTCH_Num *edlotab;   /* Edge load array          */
81 
82     SCOTCH_graphData(graph, &base, &vert, &verttab, &vendtab, &velotab,
83             &vlbltab, &edge, &edgetab, &edlotab);
84 
85     int *vertex_is_present = (int *)malloc(vert*sizeof(int));
86     for (int v = 0; v < vert; v++) {
87         vertex_is_present[v] = -1;
88     }
89     for (int v = 0; v < num_vertices; v++) {
90         vertex_is_present[vertices[v]] = v;
91     }
92 
93     // TODO handle other cases
94     if (vendtab) {
95         for (int i = 0; i < vert; i++) {
96             assert(vendtab[i] == verttab[i+1]);
97         }
98     }
99 
100     SCOTCH_Num *new_verttab;   /* Vertex array [vertnbr+1] */
101     SCOTCH_Num *new_vendtab;   /* Vertex array [vertnbr]   */
102     SCOTCH_Num *new_velotab;   /* Vertex load array        */
103     SCOTCH_Num *new_vlbltab;   /* Vertex label array       */
104     SCOTCH_Num new_edge;       /* Number of edges (arcs)   */
105     SCOTCH_Num *new_edgetab;   /* Edge array [edgenbr]     */
106     SCOTCH_Num *new_edlotab;   /* Edge load array          */
107 
108     new_verttab = (SCOTCH_Num *)malloc((num_vertices+1)*sizeof(SCOTCH_Num));
109     new_vendtab = NULL;
110     if (velotab)
111         new_velotab = (SCOTCH_Num *)malloc(num_vertices*sizeof(SCOTCH_Num));
112     else
113         new_velotab = NULL;
114     if (vlbltab)
115         new_vlbltab = (SCOTCH_Num *)malloc(num_vertices*sizeof(SCOTCH_Num));
116     else
117         new_vlbltab = NULL;
118 
119     new_edgetab = (SCOTCH_Num *)malloc(edge*sizeof(SCOTCH_Num));
120     new_edlotab = (SCOTCH_Num *)malloc(edge*sizeof(SCOTCH_Num));
121 
122     int edge_idx = 0;
123     new_verttab[0] = 0;
124     for (int v = 0; v < num_vertices; v++) {
125         if (velotab)
126             new_velotab[v] = velotab[vertices[v]];
127         if (vlbltab)
128             new_vlbltab[v] = vlbltab[vertices[v]];
129 
130         for (int e = verttab[vertices[v]]; e < verttab[vertices[v]+1]; e++) {
131             int dest_vertex = edgetab[e];
132             int new_dest = vertex_is_present[dest_vertex];
133             if (new_dest != -1) {
134                 new_edgetab[edge_idx] = new_dest;
135                 new_edlotab[edge_idx] = edlotab[e];
136                 edge_idx++;
137             }
138         }
139         new_verttab[v+1] = edge_idx;
140     }
141 
142     new_edge = edge_idx;
143 
144     SCOTCH_Num *old_edgetab = new_edgetab;
145     new_edgetab = (SCOTCH_Num *)
146         realloc(new_edgetab, new_edge*sizeof(SCOTCH_Num));
147     if (!new_edgetab) {
148         new_edgetab = old_edgetab;
149     }
150 
151     SCOTCH_Num *old_edlotab = new_edlotab;
152     new_edlotab = (SCOTCH_Num *)
153         realloc(new_edlotab, new_edge*sizeof(SCOTCH_Num));
154     if (!new_edlotab) {
155         new_edlotab = old_edlotab;
156     }
157 
158     ret = SCOTCH_graphBuild (nodegraph, base, num_vertices,
159                 new_verttab, new_vendtab, new_velotab, new_vlbltab,
160                 new_edge, new_edgetab, new_edlotab);
161 
162     free(vertex_is_present);
163 
164     return ret;
165 }
166 
build_current_arch(SCOTCH_Arch * scotch_arch,SCOTCH_Arch * scotch_subarch,netloc_arch_t * arch)167 static int build_current_arch(SCOTCH_Arch *scotch_arch,
168         SCOTCH_Arch *scotch_subarch, netloc_arch_t *arch)
169 {
170     int ret;
171     /* First we need to get the topology of the whole machine */
172     ret = netloc_arch_build(arch, 1);
173     if( NETLOC_SUCCESS != ret ) {
174         return ret;
175     }
176 
177     if (scotch_subarch) {
178         /* Set the current nodes and slots in the arch */
179         ret = netloc_arch_set_current_resources(arch);
180     } else {
181         ret = netloc_arch_set_global_resources(arch);
182     }
183 
184     if( NETLOC_SUCCESS != ret ) {
185         return ret;
186     }
187 
188     SCOTCH_archInit(scotch_arch);
189     ret = arch_tree_to_scotch_arch(arch->arch.global_tree, scotch_arch);
190     if (NETLOC_SUCCESS != ret) {
191         return ret;
192     }
193 
194     if (scotch_subarch) {
195         /* Now we can build the sub architecture */
196         SCOTCH_archInit(scotch_subarch);
197         ret = build_subarch(scotch_arch, arch->num_current_hosts,
198                 arch->current_hosts, scotch_subarch);
199     }
200 
201     return ret;
202 }
203 
netlocscotch_build_global_arch(SCOTCH_Arch * arch)204 int netlocscotch_build_global_arch(SCOTCH_Arch *arch)
205 {
206     int ret;
207     netloc_arch_t *netloc_arch = netloc_arch_construct();
208     ret = build_current_arch(arch, NULL, netloc_arch);
209 
210     netloc_arch_destruct(netloc_arch);
211     return ret;
212 }
213 
netlocscotch_build_current_arch(SCOTCH_Arch * arch,SCOTCH_Arch * subarch)214 int netlocscotch_build_current_arch(SCOTCH_Arch *arch, SCOTCH_Arch *subarch)
215 {
216     int ret;
217     netloc_arch_t *netloc_arch = netloc_arch_construct();
218     ret = build_current_arch(arch, subarch, netloc_arch);
219 
220     if (ret == NETLOC_SUCCESS)
221         netloc_arch_destruct(netloc_arch);
222 
223     return ret;
224 }
225 
netlocscotch_get_mapping_from_graph(SCOTCH_Graph * graph,netlocscotch_core_t ** pcores)226 int netlocscotch_get_mapping_from_graph(SCOTCH_Graph *graph,
227         netlocscotch_core_t **pcores)
228 {
229     int ret;
230 
231     SCOTCH_Arch scotch_arch;
232     SCOTCH_Arch scotch_subarch;
233     netlocscotch_core_t *cores = NULL;
234     netloc_arch_t *arch = netloc_arch_construct();
235     ret = build_current_arch(&scotch_arch, &scotch_subarch, arch);
236     if (NETLOC_SUCCESS != ret) {
237         netloc_arch_destruct(arch);
238         return ret;
239     }
240 
241     NETLOC_int graph_size;
242     SCOTCH_graphSize(graph, &graph_size, NULL);
243 
244     int num_hosts = arch->num_current_hosts;
245 
246     SCOTCH_Strat strategy;
247     SCOTCH_stratInit(&strategy);
248     /* We force Scotch to use all the processes
249      * barat is 0.01 as in SCOTCH_STRATDEFAULT */
250     SCOTCH_stratGraphMapBuild(&strategy, SCOTCH_STRATQUALITY, graph_size, 0.01);
251 
252     /* The ranks are the indices of the nodes in the complete graph */
253     NETLOC_int *ranks = (NETLOC_int *)malloc(graph_size*sizeof(NETLOC_int));
254     ret = SCOTCH_graphMap(graph, &scotch_subarch, &strategy, ranks);
255 
256     SCOTCH_stratExit(&strategy);
257 
258     SCOTCH_archExit(&scotch_subarch);
259     SCOTCH_archExit(&scotch_arch);
260 
261     if (ret != 0) {
262         fprintf(stderr, "Error: SCOTCH_graphMap failed\n");
263         goto ERROR;
264     }
265 
266     cores = (netlocscotch_core_t *)
267         malloc(graph_size*sizeof(netlocscotch_core_t));
268     if (!arch->has_slots) {
269         /* We have the mapping but only for the nodes, not inside the nodes */
270 
271         UT_array *process_by_node[num_hosts];
272         for (int n = 0; n < num_hosts; n++) {
273             utarray_new(process_by_node[n], &ut_int_icd);
274         }
275 
276         /* Find the processes mapped to the nodes */
277         for (int p = 0; p < graph_size; p++) {
278             int rank = ranks[p];
279             if (rank >= num_hosts || rank < 0) {
280                 ret = NETLOC_ERROR;
281                 goto ERROR;
282             }
283             utarray_push_back(process_by_node[rank], &p);
284         }
285 
286         /* Use the intranode topology */
287         for (int n = 0; n < num_hosts; n++) {
288             int *process_list = (int *)process_by_node[n]->d;
289             int num_processes = utarray_len(process_by_node[n]);
290             netloc_arch_node_t *node =
291                 arch->node_slot_by_idx[arch->current_hosts[n]].node;
292             NETLOC_int node_ranks[num_processes];
293 
294             /* We need to extract the subgraph with only the vertices mapped to the
295              * current node */
296             SCOTCH_Graph nodegraph; /* graph with only elements for node n */
297             build_subgraph(graph, process_list, num_processes, &nodegraph);
298 
299             /* Build the scotch arch of the all node */
300             SCOTCH_Arch scotch_nodearch;
301             ret = arch_tree_to_scotch_arch(node->slot_tree, &scotch_nodearch);
302             if (NETLOC_SUCCESS != ret) {
303                 goto ERROR;
304             }
305 
306             /* Restrict the scotch arch to the available cores */
307             SCOTCH_Arch scotch_nodesubarch;
308             ret = build_subarch(&scotch_nodearch, node->num_current_slots,
309                     node->current_slots, &scotch_nodesubarch);
310             if (NETLOC_SUCCESS != ret) {
311                 goto ERROR;
312             }
313 
314             /* Find the mapping to the cores */
315             ret = SCOTCH_graphMap(&nodegraph, &scotch_nodesubarch, &strategy, node_ranks);
316             if (ret != 0) {
317                 fprintf(stderr, "Error: SCOTCH_graphMap failed\n");
318                 goto ERROR;
319             }
320 
321             /* Report the node ranks in the global rank array */
322             for (int p = 0; p < num_processes; p++) {
323                 int process = process_list[p];
324                 int arch_idx = node->current_slots[node_ranks[p]];
325                 cores[process].core = node->slot_os_idx[arch_idx];
326                 cores[process].nodename = strdup(node->node->hostname);
327                 cores[process].rank = node->slot_ranks[node_ranks[p]];
328             }
329         }
330         for (int n = 0; n < num_hosts; n++) {
331             utarray_free(process_by_node[n]);
332         }
333     } else {
334         for (int p = 0; p < graph_size; p++) {
335             int host_idx = arch->current_hosts[ranks[p]];
336             netloc_arch_node_t *node = arch->node_slot_by_idx[host_idx].node;
337             int slot_rank = arch->node_slot_by_idx[host_idx].slot;
338             cores[p].nodename = strdup(node->node->hostname);
339             cores[p].core = node->slot_os_idx[node->slot_idx[slot_rank]];
340             cores[p].rank = node->slot_ranks[node->slot_idx[slot_rank]];
341         }
342     }
343 
344     *pcores = cores;
345 
346 ERROR:
347     free(ranks);
348     netloc_arch_destruct(arch);
349     if (ret == NETLOC_SUCCESS)
350         return ret;
351     free(cores);
352     return ret;
353 }
354 
netlocscotch_get_mapping_from_comm_matrix(double ** comm,int num_vertices,netlocscotch_core_t ** pcores)355 int netlocscotch_get_mapping_from_comm_matrix(double **comm, int num_vertices,
356         netlocscotch_core_t **pcores)
357 {
358     int ret;
359 
360     SCOTCH_Graph graph;
361     ret = comm_matrix_to_scotch_graph(comm, num_vertices, &graph);
362     if (NETLOC_SUCCESS != ret) {
363         return ret;
364     }
365 
366     ret = netlocscotch_get_mapping_from_graph(&graph, pcores);
367 
368     /* Free arrays */
369     {
370         SCOTCH_Num base;       /* Base value               */
371         SCOTCH_Num vert;       /* Number of vertices       */
372         SCOTCH_Num *verttab;   /* Vertex array [vertnbr+1] */
373         SCOTCH_Num *vendtab;   /* Vertex array [vertnbr]   */
374         SCOTCH_Num *velotab;   /* Vertex load array        */
375         SCOTCH_Num *vlbltab;   /* Vertex label array       */
376         SCOTCH_Num edge;       /* Number of edges (arcs)   */
377         SCOTCH_Num *edgetab;   /* Edge array [edgenbr]     */
378         SCOTCH_Num *edlotab;   /* Edge load array          */
379 
380         SCOTCH_graphData(&graph, &base, &vert, &verttab, &vendtab, &velotab,
381                 &vlbltab, &edge, &edgetab, &edlotab);
382         free(edlotab);
383         free(edgetab);
384         free(verttab);
385         SCOTCH_graphExit(&graph);
386     }
387 
388     return ret;
389 }
390 
netlocscotch_get_mapping_from_comm_file(char * filename,int * pnum_processes,netlocscotch_core_t ** pcores)391 int netlocscotch_get_mapping_from_comm_file(char *filename, int *pnum_processes,
392         netlocscotch_core_t **pcores)
393 {
394     int ret;
395     int n;
396     double **mat;
397 
398     ret = netloc_build_comm_mat(filename, &n, &mat);
399 
400     if (ret != NETLOC_SUCCESS) {
401         return ret;
402     }
403 
404     *pnum_processes = n;
405 
406     ret = netlocscotch_get_mapping_from_comm_matrix(mat, n, pcores);
407 
408     free(mat[0]);
409     free(mat);
410 
411     return ret;
412 }
413 
comm_matrix_to_scotch_graph(double ** matrix,int n,SCOTCH_Graph * graph)414 static int comm_matrix_to_scotch_graph(double **matrix, int n, SCOTCH_Graph *graph)
415 {
416     int ret;
417 
418     SCOTCH_Num base;       /* Base value               */
419     SCOTCH_Num vert;       /* Number of vertices       */
420     SCOTCH_Num *verttab;   /* Vertex array [vertnbr+1] */
421     SCOTCH_Num *vendtab;   /* Vertex array [vertnbr]   */
422     SCOTCH_Num *velotab;   /* Vertex load array        */
423     SCOTCH_Num *vlbltab;   /* Vertex label array       */
424     SCOTCH_Num edge;       /* Number of edges (arcs)   */
425     SCOTCH_Num *edgetab;   /* Edge array [edgenbr]     */
426     SCOTCH_Num *edlotab;   /* Edge load array          */
427 
428     base = 0;
429     vert = n;
430 
431     verttab = (SCOTCH_Num *)malloc((vert+1)*sizeof(SCOTCH_Num));
432     for (int v = 0; v < vert+1; v++) {
433         verttab[v] = v*(n-1);
434     }
435 
436     vendtab = NULL;
437     velotab = NULL;
438     vlbltab = NULL;
439 
440     edge = n*(n-1);
441 
442     /* Compute the lowest load to reduce of the values of the load to avoid overflow */
443     double min_load = -1;
444     for (int v1 = 0; v1 < vert; v1++) {
445         for (int v2 = 0; v2 < vert; v2++) {
446             double load = matrix[v1][v2];
447             if (load >= 0.01 && (load < min_load || min_load < 0)) /* TODO set an epsilon */
448                 min_load = load;
449         }
450     }
451 
452     edgetab = (SCOTCH_Num *)malloc(n*(n-1)*sizeof(SCOTCH_Num));
453     edlotab = (SCOTCH_Num *)malloc(n*(n-1)*sizeof(SCOTCH_Num));
454     for (int v1 = 0; v1 < vert; v1++) {
455         for (int v2 = 0; v2 < vert; v2++) {
456             if (v2 == v1)
457                 continue;
458             int idx = v1*(n-1)+((v2 < v1) ? v2: v2-1);
459             edgetab[idx] = v2;
460             edlotab[idx] = (int)(matrix[v1][v2]/min_load);
461         }
462     }
463 
464     ret = SCOTCH_graphBuild(graph, base, vert,
465             verttab, vendtab, velotab, vlbltab, edge, edgetab, edlotab);
466 
467     return ret;
468 }
469 
470