1 /*****************************************************************************\
2  *  src/common/mapping.c - routines for compact process mapping representation
3  *****************************************************************************
4  *  Copyright (C) 2014 Institute of Semiconductor Physics
5  *                     Siberian Branch of Russian Academy of Science
6  *  Written by Artem Polyakov <artpol84@gmail.com>.
7  *  All rights reserved.
8  *
9  *  This file is part of Slurm, a resource management program.
10  *  For details, see <https://slurm.schedmd.com/>.
11  *  Please also read the included file: DISCLAIMER.
12  *
13  *  Slurm is free software; you can redistribute it and/or modify it under
14  *  the terms of the GNU General Public License as published by the Free
15  *  Software Foundation; either version 2 of the License, or (at your option)
16  *  any later version.
17  *
18  *  In addition, as a special exception, the copyright holders give permission
19  *  to link the code of portions of this program with the OpenSSL library under
20  *  certain conditions as described in each individual source file, and
21  *  distribute linked combinations including the two. You must obey the GNU
22  *  General Public License in all respects for all of the code used other than
23  *  OpenSSL. If you modify file(s) with this exception, you may extend this
24  *  exception to your version of the file(s), but you are not obligated to do
25  *  so. If you do not wish to do so, delete this exception statement from your
26  *  version.  If you delete this exception statement from all source files in
27  *  the program, then also delete it here.
28  *
29  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
30  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
32  *  details.
33  *
34  *  You should have received a copy of the GNU General Public License along
35  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
36  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
37 \*****************************************************************************/
38 
39 #define _GNU_SOURCE
40 
41 #include "src/common/mapping.h"
42 
_dump_config(uint32_t node_cnt,uint32_t task_cnt,uint16_t * tasks,uint32_t ** tids,int offset)43 static void _dump_config(uint32_t node_cnt, uint32_t task_cnt,
44 			 uint16_t *tasks, uint32_t **tids, int offset)
45 {
46 	int i, j;
47 
48 	error("%s: Unable to find task offset %d", __func__, offset);
49 	for (i = 0; i < node_cnt; i++) {
50 		for (j = 0; j < tasks[i]; j++) {
51 			error("TIDS[%d][%d]:%u", i, j, tids[i][j]);
52 		}
53 	}
54 }
55 
56 /*
57  * pack_process_mapping()
58  */
59 char *
pack_process_mapping(uint32_t node_cnt,uint32_t task_cnt,uint16_t * tasks,uint32_t ** tids)60 pack_process_mapping(uint32_t node_cnt,
61 		     uint32_t task_cnt,
62 		     uint16_t *tasks,
63 		     uint32_t **tids)
64 {
65 	int offset, i;
66 	int start_node, end_node;
67 	char *packing = NULL;
68 
69 	/*
70 	 * next_task[i] - next process for processing
71 	 */
72 	uint16_t *next_task = xmalloc(node_cnt * sizeof(uint16_t));
73 
74 	packing = xstrdup("(vector");
75 	offset = 0;
76 	while (offset < task_cnt) {
77 		int mapped = 0;
78 		int depth = -1;
79 		int j;
80 		start_node = end_node = 0;
81 
82 		/* find the task with id == offset */
83 		for (i = 0; i < node_cnt; i++) {
84 
85 			if (next_task[i] < tasks[i]) {
86 				/*
87 				 * if we didn't consume entire
88 				 * quota on this node
89 				 */
90 				if (offset > tids[i][next_task[i]]) {
91 					_dump_config(node_cnt, task_cnt,
92 						     tasks, tids, offset);
93 					abort();
94 				}
95 				if (offset == tids[i][next_task[i]]) {
96 					start_node = i;
97 					break;
98 				}
99 			}
100 		}
101 
102 		end_node = node_cnt;
103 		for (i = start_node; i < end_node; i++) {
104 			if (next_task[i] >= tasks[i] ) {
105 				/*
106 				 * Save first non-matching node index
107 				 * and interrupt loop
108 				 */
109 				end_node = i;
110 				continue;
111 			}
112 
113 			for (j = next_task[i]; ((j + 1) < tasks[i])
114 				     && ((tids[i][j]+1) == tids[i][j+1]); j++);
115 			j++;
116 			/*
117 			 * First run determines the depth
118 			 */
119 			if (depth < 0) {
120 				depth = j - next_task[i];
121 			} else {
122 				/*
123 				 * If this is not the first node in the bar
124 				 * check that: 1. First tid on this node is
125 				 * sequentially next after last tid
126 				 *    on the previous node
127 				 */
128 				if (tids[i-1][next_task[i-1]-1] + 1
129 				    != tids[i][next_task[i]]) {
130 					end_node = i;
131 					continue;
132 				}
133 			}
134 
135 			if (depth == (j - next_task[i])) {
136 				mapped += depth;
137 				next_task[i] = j;
138 			} else {
139 				/*
140 				 * Save first non-matching node index
141 				 * and interrupt loop
142 				 */
143 				end_node = i;
144 			}
145 		}
146 		xstrfmtcat(packing,",(%u,%u,%u)",
147 			   start_node, end_node - start_node, depth);
148 		offset += mapped;
149 	}
150 	xfree(next_task);
151 	xstrcat(packing,")");
152 	return packing;
153 }
154 
155 uint32_t *
unpack_process_mapping_flat(char * map,uint32_t node_cnt,uint32_t task_cnt,uint16_t * tasks)156 unpack_process_mapping_flat(char *map,
157 			    uint32_t node_cnt,
158 			    uint32_t task_cnt,
159 			    uint16_t *tasks)
160 {
161 	/*
162 	 * Start from the flat array. For i'th task is located
163 	 * on the task_map[i]'th node
164 	 */
165 	uint32_t *task_map = xmalloc(sizeof(int) * task_cnt);
166 	char *prefix = "(vector,", *p = NULL;
167 	uint32_t taskid, i;
168 
169 	if (tasks) {
170 		for (i = 0; i < node_cnt; i++) {
171 			tasks[i] = 0;
172 		}
173 	}
174 
175 	if ((p = strstr(map, prefix)) == NULL) {
176 		error("\
177 unpack_process_mapping: The mapping string should start from %s", prefix);
178 		goto err_exit;
179 	}
180 
181 	/*
182 	 * Skip prefix
183 	 */
184 	p += strlen(prefix);
185 	taskid = 0;
186 	while ((p = strchr(p,'('))) {
187 		int depth, node, end_node;
188 		p++;
189 		if (3!= sscanf(p,"%d,%d,%d", &node, &end_node, &depth)) {
190 			goto err_exit;
191 		}
192 		end_node += node;
193 		xassert(node < node_cnt && end_node <= node_cnt );
194 		for (; node < end_node; node++) {
195 			for (i = 0; i < depth; i++){
196 				task_map[taskid++] = node;
197 				if (tasks != NULL) {
198 					/*
199 					 * Cont tasks on each node if was
200 					 * requested
201 					 */
202 					tasks[node]++;
203 				}
204 			}
205 		}
206 	}
207 	return task_map;
208 err_exit:
209 	xfree(task_map);
210 	return NULL;
211 }
212 
213 int
unpack_process_mapping(char * map,uint32_t node_cnt,uint32_t task_cnt,uint16_t * tasks,uint32_t ** tids)214 unpack_process_mapping(char *map,
215 		       uint32_t node_cnt,
216 		       uint32_t task_cnt,
217 		       uint16_t *tasks,
218 		       uint32_t **tids)
219 {
220 	/*
221 	 * Start from the flat array. For i'th task is located
222 	 * on the task_map[i]'th node
223 	 */
224 	uint32_t *task_map = NULL;
225 	uint16_t *node_task_cnt = NULL;
226 	uint32_t i;
227 	int rc = 0;
228 
229 	task_map = unpack_process_mapping_flat(map, node_cnt, task_cnt, tasks);
230 	if (task_map == NULL) {
231 		rc = SLURM_ERROR;
232 		goto err_exit;
233 	}
234 
235 	node_task_cnt = xmalloc(sizeof(uint16_t) * node_cnt);
236 	for (i = 0;  i < node_cnt; i++){
237 		tids[i] = xmalloc(sizeof(uint32_t) * tasks[i]);
238 		node_task_cnt[i] = 0;
239 	}
240 
241 	for (i = 0; i < task_cnt; i++) {
242 		uint32_t node = task_map[i];
243 		tids[node][ node_task_cnt[node]++ ] = i;
244 		xassert( node_task_cnt[node] <= tasks[node] );
245 	}
246 
247 	goto exit;
248 err_exit:
249 	error("unpack_process_mapping: bad mapping format");
250 exit:
251 	if (task_map != NULL){
252 		xfree(task_map);
253 	}
254 
255 	if (node_task_cnt != NULL){
256 		xfree(node_task_cnt);
257 	}
258 	return rc;
259 }
260 
261 
262 #if 0
263 
264 /*
265  * Mutual check for both routines
266  */
267 
268 /*
269  * Emulate 16-core nodes
270  */
271 #define NCPUS 16
272 #define NODES 200
273 
274 static
275 void block_distr(uint32_t task_cnt,
276 		 uint16_t *tasks,
277 		 uint32_t **tids)
278 {
279 	int i, j, tnum = 0;
280 
281 	for (i = 0; i < NODES; i++) {
282 		tasks[i] = 0;
283 	}
284 
285 	/* BLOCK distribution
286 	 */
287 	for (i = 0; (i < NODES) && (tnum < task_cnt); i++) {
288 		for (j = 0; j < NCPUS && (tnum < task_cnt); j++) {
289 			tids[i][j] = tnum++;
290 		}
291 		tasks[i] = j;
292 	}
293 }
294 
295 static void
296 cyclic_distr(uint32_t task_cnt,
297 	     uint16_t *tasks,
298 	     uint32_t **tids)
299 {
300 	int i, j, tnum = 0;
301 	/* CYCLIC distribution
302 	 */
303 	tnum = 0;
304 	for (i = 0; i < NODES; i++) {
305 		tasks[i] = 0;
306 	}
307 	for (j = 0; j < NCPUS && (tnum < task_cnt); j++) {
308 		for (i = 0; (i < NODES) && (tnum < task_cnt); i++ ) {
309 			tids[i][j] = tnum++;
310 			tasks[i]++;
311 		}
312 	}
313 }
314 
315 
316 static void
317 plane_distr(uint32_t task_cnt,
318 	    int plane_factor,
319 	    uint16_t *tasks,
320 	    uint32_t **tids)
321 {
322 	int i, j, tnum = 0;
323 	/* PLANE distribution
324 	 */
325 	tnum = 0;
326 	for (i = 0; i < NODES; i++) {
327 		tasks[i] = 0;
328 	}
329 
330 	while (tnum < task_cnt) {
331 		for (i = 0; (i < NODES) && (tnum < task_cnt); i++) {
332 			for (j = 0;
333 			    (j < plane_factor)
334 				    && (tasks[i] < NCPUS)
335 				    && (tnum < task_cnt); j++) {
336 				tids[i][tasks[i]++] = tnum++;
337 			}
338 		}
339 	}
340 }
341 
342 static void check(uint32_t node_cnt, uint32_t task_cnt,
343 		  uint16_t *tasks, uint32_t **tids)
344 {
345 	uint16_t *new_tasks;
346 	uint32_t **new_tids;
347 	char *map = pack_process_mapping(node_cnt, task_cnt, tasks, tids);
348 	int i,j;
349 
350 	printf("mapping: %s\n", map);
351 
352 	new_tasks = xmalloc(sizeof(uint16_t) * node_cnt);
353 	new_tids = xmalloc(sizeof(uint32_t *) * node_cnt);
354 	unpack_process_mapping(map,node_cnt,task_cnt,new_tasks,new_tids);
355 
356 	for (i = 0; i < node_cnt; i++) {
357 
358 		if (new_tasks[i] != tasks[i]) {
359 			printf("Task count mismatch on node %d\n", i);
360 			exit(0);
361 		}
362 
363 		for (j = 0; j< tasks[i]; j++) {
364 			if (new_tids[i][j] != tids[i][j]){
365 				printf("\
366 Task id mismatch on node %d, idx = %d\n", i, j);
367 				exit(0);
368 			}
369 		}
370 	}
371 
372 	for (i = 0; i< node_cnt; i++) {
373 		xfree(new_tids[i]);
374 	}
375 	xfree(new_tasks);
376 	xfree(new_tids);
377 
378 	xfree(map);
379 
380 }
381 
382 
383 int
384 main(int argc, char **argv)
385 {
386 	uint16_t  tasks[NODES] = { 0 };
387 	uint32_t **tids = NULL;
388 	int tnum = 0, i;
389 
390 	tids = xmalloc(sizeof(uint32_t*) * NODES);
391 	for (i = 0; i< NODES; i++) {
392 		tids[i] = xmalloc(sizeof(uint32_t) * NCPUS);
393 	}
394 
395 	for (tnum = 1; tnum < NCPUS*NODES; tnum++) {
396 
397 		printf("Map %d tasks into cluster %dx%d\n", tnum, NODES, NCPUS);
398 		block_distr(tnum, tasks, tids);
399 		check(NODES,tnum, tasks, tids);
400 
401 		cyclic_distr(tnum, tasks, tids);
402 		check(NODES,tnum, tasks, tids);
403 
404 		plane_distr(tnum,2,tasks, tids);
405 		check(NODES,tnum, tasks, tids);
406 
407 		plane_distr(tnum,4,tasks, tids);
408 		check(NODES,tnum, tasks, tids);
409 
410 		plane_distr(tnum,6,tasks, tids);
411 		check(NODES,tnum, tasks, tids);
412 
413 		plane_distr(tnum,8,tasks, tids);
414 		check(NODES,tnum, tasks, tids);
415 	}
416 
417 	for (i = 0; i < NODES; i++){
418 		xfree(tids[i]);
419 	}
420 	xfree(tids);
421 
422 	return 0;
423 }
424 
425 #endif
426