1 /*****************************************************************************\
2 * src/common/mapping.c - routines for compact process mapping representation
3 *****************************************************************************
4 * Copyright (C) 2014 Institute of Semiconductor Physics
5 * Siberian Branch of Russian Academy of Science
6 * Written by Artem Polyakov <artpol84@gmail.com>.
7 * All rights reserved.
8 *
9 * This file is part of Slurm, a resource management program.
10 * For details, see <https://slurm.schedmd.com/>.
11 * Please also read the included file: DISCLAIMER.
12 *
13 * Slurm is free software; you can redistribute it and/or modify it under
14 * the terms of the GNU General Public License as published by the Free
15 * Software Foundation; either version 2 of the License, or (at your option)
16 * any later version.
17 *
18 * In addition, as a special exception, the copyright holders give permission
19 * to link the code of portions of this program with the OpenSSL library under
20 * certain conditions as described in each individual source file, and
21 * distribute linked combinations including the two. You must obey the GNU
22 * General Public License in all respects for all of the code used other than
23 * OpenSSL. If you modify file(s) with this exception, you may extend this
24 * exception to your version of the file(s), but you are not obligated to do
25 * so. If you do not wish to do so, delete this exception statement from your
26 * version. If you delete this exception statement from all source files in
27 * the program, then also delete it here.
28 *
29 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
30 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
32 * details.
33 *
34 * You should have received a copy of the GNU General Public License along
35 * with Slurm; if not, write to the Free Software Foundation, Inc.,
36 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
37 \*****************************************************************************/
38
39 #define _GNU_SOURCE
40
41 #include "src/common/mapping.h"
42
_dump_config(uint32_t node_cnt,uint32_t task_cnt,uint16_t * tasks,uint32_t ** tids,int offset)43 static void _dump_config(uint32_t node_cnt, uint32_t task_cnt,
44 uint16_t *tasks, uint32_t **tids, int offset)
45 {
46 int i, j;
47
48 error("%s: Unable to find task offset %d", __func__, offset);
49 for (i = 0; i < node_cnt; i++) {
50 for (j = 0; j < tasks[i]; j++) {
51 error("TIDS[%d][%d]:%u", i, j, tids[i][j]);
52 }
53 }
54 }
55
56 /*
57 * pack_process_mapping()
58 */
59 char *
pack_process_mapping(uint32_t node_cnt,uint32_t task_cnt,uint16_t * tasks,uint32_t ** tids)60 pack_process_mapping(uint32_t node_cnt,
61 uint32_t task_cnt,
62 uint16_t *tasks,
63 uint32_t **tids)
64 {
65 int offset, i;
66 int start_node, end_node;
67 char *packing = NULL;
68
69 /*
70 * next_task[i] - next process for processing
71 */
72 uint16_t *next_task = xmalloc(node_cnt * sizeof(uint16_t));
73
74 packing = xstrdup("(vector");
75 offset = 0;
76 while (offset < task_cnt) {
77 int mapped = 0;
78 int depth = -1;
79 int j;
80 start_node = end_node = 0;
81
82 /* find the task with id == offset */
83 for (i = 0; i < node_cnt; i++) {
84
85 if (next_task[i] < tasks[i]) {
86 /*
87 * if we didn't consume entire
88 * quota on this node
89 */
90 if (offset > tids[i][next_task[i]]) {
91 _dump_config(node_cnt, task_cnt,
92 tasks, tids, offset);
93 abort();
94 }
95 if (offset == tids[i][next_task[i]]) {
96 start_node = i;
97 break;
98 }
99 }
100 }
101
102 end_node = node_cnt;
103 for (i = start_node; i < end_node; i++) {
104 if (next_task[i] >= tasks[i] ) {
105 /*
106 * Save first non-matching node index
107 * and interrupt loop
108 */
109 end_node = i;
110 continue;
111 }
112
113 for (j = next_task[i]; ((j + 1) < tasks[i])
114 && ((tids[i][j]+1) == tids[i][j+1]); j++);
115 j++;
116 /*
117 * First run determines the depth
118 */
119 if (depth < 0) {
120 depth = j - next_task[i];
121 } else {
122 /*
123 * If this is not the first node in the bar
124 * check that: 1. First tid on this node is
125 * sequentially next after last tid
126 * on the previous node
127 */
128 if (tids[i-1][next_task[i-1]-1] + 1
129 != tids[i][next_task[i]]) {
130 end_node = i;
131 continue;
132 }
133 }
134
135 if (depth == (j - next_task[i])) {
136 mapped += depth;
137 next_task[i] = j;
138 } else {
139 /*
140 * Save first non-matching node index
141 * and interrupt loop
142 */
143 end_node = i;
144 }
145 }
146 xstrfmtcat(packing,",(%u,%u,%u)",
147 start_node, end_node - start_node, depth);
148 offset += mapped;
149 }
150 xfree(next_task);
151 xstrcat(packing,")");
152 return packing;
153 }
154
155 uint32_t *
unpack_process_mapping_flat(char * map,uint32_t node_cnt,uint32_t task_cnt,uint16_t * tasks)156 unpack_process_mapping_flat(char *map,
157 uint32_t node_cnt,
158 uint32_t task_cnt,
159 uint16_t *tasks)
160 {
161 /*
162 * Start from the flat array. For i'th task is located
163 * on the task_map[i]'th node
164 */
165 uint32_t *task_map = xmalloc(sizeof(int) * task_cnt);
166 char *prefix = "(vector,", *p = NULL;
167 uint32_t taskid, i;
168
169 if (tasks) {
170 for (i = 0; i < node_cnt; i++) {
171 tasks[i] = 0;
172 }
173 }
174
175 if ((p = strstr(map, prefix)) == NULL) {
176 error("\
177 unpack_process_mapping: The mapping string should start from %s", prefix);
178 goto err_exit;
179 }
180
181 /*
182 * Skip prefix
183 */
184 p += strlen(prefix);
185 taskid = 0;
186 while ((p = strchr(p,'('))) {
187 int depth, node, end_node;
188 p++;
189 if (3!= sscanf(p,"%d,%d,%d", &node, &end_node, &depth)) {
190 goto err_exit;
191 }
192 end_node += node;
193 xassert(node < node_cnt && end_node <= node_cnt );
194 for (; node < end_node; node++) {
195 for (i = 0; i < depth; i++){
196 task_map[taskid++] = node;
197 if (tasks != NULL) {
198 /*
199 * Cont tasks on each node if was
200 * requested
201 */
202 tasks[node]++;
203 }
204 }
205 }
206 }
207 return task_map;
208 err_exit:
209 xfree(task_map);
210 return NULL;
211 }
212
213 int
unpack_process_mapping(char * map,uint32_t node_cnt,uint32_t task_cnt,uint16_t * tasks,uint32_t ** tids)214 unpack_process_mapping(char *map,
215 uint32_t node_cnt,
216 uint32_t task_cnt,
217 uint16_t *tasks,
218 uint32_t **tids)
219 {
220 /*
221 * Start from the flat array. For i'th task is located
222 * on the task_map[i]'th node
223 */
224 uint32_t *task_map = NULL;
225 uint16_t *node_task_cnt = NULL;
226 uint32_t i;
227 int rc = 0;
228
229 task_map = unpack_process_mapping_flat(map, node_cnt, task_cnt, tasks);
230 if (task_map == NULL) {
231 rc = SLURM_ERROR;
232 goto err_exit;
233 }
234
235 node_task_cnt = xmalloc(sizeof(uint16_t) * node_cnt);
236 for (i = 0; i < node_cnt; i++){
237 tids[i] = xmalloc(sizeof(uint32_t) * tasks[i]);
238 node_task_cnt[i] = 0;
239 }
240
241 for (i = 0; i < task_cnt; i++) {
242 uint32_t node = task_map[i];
243 tids[node][ node_task_cnt[node]++ ] = i;
244 xassert( node_task_cnt[node] <= tasks[node] );
245 }
246
247 goto exit;
248 err_exit:
249 error("unpack_process_mapping: bad mapping format");
250 exit:
251 if (task_map != NULL){
252 xfree(task_map);
253 }
254
255 if (node_task_cnt != NULL){
256 xfree(node_task_cnt);
257 }
258 return rc;
259 }
260
261
262 #if 0
263
264 /*
265 * Mutual check for both routines
266 */
267
268 /*
269 * Emulate 16-core nodes
270 */
271 #define NCPUS 16
272 #define NODES 200
273
274 static
275 void block_distr(uint32_t task_cnt,
276 uint16_t *tasks,
277 uint32_t **tids)
278 {
279 int i, j, tnum = 0;
280
281 for (i = 0; i < NODES; i++) {
282 tasks[i] = 0;
283 }
284
285 /* BLOCK distribution
286 */
287 for (i = 0; (i < NODES) && (tnum < task_cnt); i++) {
288 for (j = 0; j < NCPUS && (tnum < task_cnt); j++) {
289 tids[i][j] = tnum++;
290 }
291 tasks[i] = j;
292 }
293 }
294
295 static void
296 cyclic_distr(uint32_t task_cnt,
297 uint16_t *tasks,
298 uint32_t **tids)
299 {
300 int i, j, tnum = 0;
301 /* CYCLIC distribution
302 */
303 tnum = 0;
304 for (i = 0; i < NODES; i++) {
305 tasks[i] = 0;
306 }
307 for (j = 0; j < NCPUS && (tnum < task_cnt); j++) {
308 for (i = 0; (i < NODES) && (tnum < task_cnt); i++ ) {
309 tids[i][j] = tnum++;
310 tasks[i]++;
311 }
312 }
313 }
314
315
316 static void
317 plane_distr(uint32_t task_cnt,
318 int plane_factor,
319 uint16_t *tasks,
320 uint32_t **tids)
321 {
322 int i, j, tnum = 0;
323 /* PLANE distribution
324 */
325 tnum = 0;
326 for (i = 0; i < NODES; i++) {
327 tasks[i] = 0;
328 }
329
330 while (tnum < task_cnt) {
331 for (i = 0; (i < NODES) && (tnum < task_cnt); i++) {
332 for (j = 0;
333 (j < plane_factor)
334 && (tasks[i] < NCPUS)
335 && (tnum < task_cnt); j++) {
336 tids[i][tasks[i]++] = tnum++;
337 }
338 }
339 }
340 }
341
342 static void check(uint32_t node_cnt, uint32_t task_cnt,
343 uint16_t *tasks, uint32_t **tids)
344 {
345 uint16_t *new_tasks;
346 uint32_t **new_tids;
347 char *map = pack_process_mapping(node_cnt, task_cnt, tasks, tids);
348 int i,j;
349
350 printf("mapping: %s\n", map);
351
352 new_tasks = xmalloc(sizeof(uint16_t) * node_cnt);
353 new_tids = xmalloc(sizeof(uint32_t *) * node_cnt);
354 unpack_process_mapping(map,node_cnt,task_cnt,new_tasks,new_tids);
355
356 for (i = 0; i < node_cnt; i++) {
357
358 if (new_tasks[i] != tasks[i]) {
359 printf("Task count mismatch on node %d\n", i);
360 exit(0);
361 }
362
363 for (j = 0; j< tasks[i]; j++) {
364 if (new_tids[i][j] != tids[i][j]){
365 printf("\
366 Task id mismatch on node %d, idx = %d\n", i, j);
367 exit(0);
368 }
369 }
370 }
371
372 for (i = 0; i< node_cnt; i++) {
373 xfree(new_tids[i]);
374 }
375 xfree(new_tasks);
376 xfree(new_tids);
377
378 xfree(map);
379
380 }
381
382
383 int
384 main(int argc, char **argv)
385 {
386 uint16_t tasks[NODES] = { 0 };
387 uint32_t **tids = NULL;
388 int tnum = 0, i;
389
390 tids = xmalloc(sizeof(uint32_t*) * NODES);
391 for (i = 0; i< NODES; i++) {
392 tids[i] = xmalloc(sizeof(uint32_t) * NCPUS);
393 }
394
395 for (tnum = 1; tnum < NCPUS*NODES; tnum++) {
396
397 printf("Map %d tasks into cluster %dx%d\n", tnum, NODES, NCPUS);
398 block_distr(tnum, tasks, tids);
399 check(NODES,tnum, tasks, tids);
400
401 cyclic_distr(tnum, tasks, tids);
402 check(NODES,tnum, tasks, tids);
403
404 plane_distr(tnum,2,tasks, tids);
405 check(NODES,tnum, tasks, tids);
406
407 plane_distr(tnum,4,tasks, tids);
408 check(NODES,tnum, tasks, tids);
409
410 plane_distr(tnum,6,tasks, tids);
411 check(NODES,tnum, tasks, tids);
412
413 plane_distr(tnum,8,tasks, tids);
414 check(NODES,tnum, tasks, tids);
415 }
416
417 for (i = 0; i < NODES; i++){
418 xfree(tids[i]);
419 }
420 xfree(tids);
421
422 return 0;
423 }
424
425 #endif
426