1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 2010-2020. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 /*
22  * Description:	CPU topology and related functionality
23  *
24  * Author: 	Rickard Green
25  */
26 
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30 
31 #include <ctype.h>
32 
33 #include "global.h"
34 #include "error.h"
35 #include "bif.h"
36 #include "erl_cpu_topology.h"
37 #include "erl_flxctr.h"
38 
39 #define ERTS_MAX_READER_GROUPS 64
40 
41 /*
42  * Cpu topology hierarchy.
43  */
44 #define ERTS_TOPOLOGY_NODE		0
45 #define ERTS_TOPOLOGY_PROCESSOR		1
46 #define ERTS_TOPOLOGY_PROCESSOR_NODE	2
47 #define ERTS_TOPOLOGY_CORE		3
48 #define ERTS_TOPOLOGY_THREAD		4
49 #define ERTS_TOPOLOGY_LOGICAL		5
50 
51 #define ERTS_TOPOLOGY_MAX_DEPTH		6
52 
53 typedef struct {
54     int bind_id;
55     int bound_id;
56 } ErtsCpuBindData;
57 
58 static erts_cpu_info_t *cpuinfo;
59 
60 static int max_main_threads;
61 static int reader_groups;
62 static int decentralized_counter_groups;
63 
64 static ErtsCpuBindData *scheduler2cpu_map;
65 static erts_rwmtx_t cpuinfo_rwmtx;
66 
67 typedef enum {
68     ERTS_CPU_BIND_UNDEFINED,
69     ERTS_CPU_BIND_SPREAD,
70     ERTS_CPU_BIND_PROCESSOR_SPREAD,
71     ERTS_CPU_BIND_THREAD_SPREAD,
72     ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
73     ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
74     ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
75     ERTS_CPU_BIND_NO_SPREAD,
76     ERTS_CPU_BIND_NONE
77 } ErtsCpuBindOrder;
78 
79 #define ERTS_CPU_BIND_DEFAULT_BIND \
80   ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
81 
82 static int no_cpu_groups_callbacks;
83 static ErtsCpuBindOrder cpu_bind_order;
84 
85 static erts_cpu_topology_t *user_cpudata;
86 static int user_cpudata_size;
87 static erts_cpu_topology_t *system_cpudata;
88 static int system_cpudata_size;
89 
90 typedef struct {
91     int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
92 } erts_avail_cput;
93 
94 typedef struct {
95     int id;
96     int sub_levels;
97     int cpu_groups;
98 } erts_cpu_groups_count_t;
99 
100 typedef struct {
101     int logical;
102     int cpu_group;
103 } erts_cpu_groups_map_array_t;
104 
105 typedef struct erts_cpu_groups_callback_list_t_ erts_cpu_groups_callback_list_t;
106 struct erts_cpu_groups_callback_list_t_ {
107     erts_cpu_groups_callback_list_t *next;
108     erts_cpu_groups_callback_t callback;
109     void *arg;
110 };
111 
112 typedef struct erts_cpu_groups_map_t_ erts_cpu_groups_map_t;
113 struct erts_cpu_groups_map_t_ {
114     erts_cpu_groups_map_t *next;
115     int groups;
116     erts_cpu_groups_map_array_t *array;
117     int size;
118     int logical_processors;
119     erts_cpu_groups_callback_list_t *callback_list;
120 };
121 
122 typedef struct {
123     erts_cpu_groups_callback_t callback;
124     int ix;
125     void *arg;
126 } erts_cpu_groups_callback_call_t;
127 
128 static erts_cpu_groups_map_t *cpu_groups_maps;
129 
130 static erts_cpu_groups_map_t *reader_groups_map;
131 
132 static erts_cpu_groups_map_t *decentralized_counter_groups_map;
133 
134 #define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH
135 
136 #define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
137 
138 static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
139 				int size,
140 				ErtsCpuBindOrder bind_order,
141 				int mk_seq);
142 static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
143 
144 static void reader_groups_callback(int, ErtsSchedulerData *, int, void *);
145 static void flxctr_groups_callback(int, ErtsSchedulerData *, int, void *);
146 static erts_cpu_groups_map_t *add_cpu_groups(int groups,
147 					     erts_cpu_groups_callback_t callback,
148 					     void *arg);
149 static void update_cpu_groups_maps(void);
150 static void make_cpu_groups_map(erts_cpu_groups_map_t *map, int test);
151 static int cpu_groups_lookup(erts_cpu_groups_map_t *map,
152 			     ErtsSchedulerData *esdp);
153 
154 static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
155 					 int *cpudata_size);
156 static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
157 
158 static int
int_cmp(const void * vx,const void * vy)159 int_cmp(const void *vx, const void *vy)
160 {
161     return *((int *) vx) - *((int *) vy);
162 }
163 
164 static int
cpu_spread_order_cmp(const void * vx,const void * vy)165 cpu_spread_order_cmp(const void *vx, const void *vy)
166 {
167     erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
168     erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
169 
170     if (x->thread != y->thread)
171 	return x->thread - y->thread;
172     if (x->core != y->core)
173 	return x->core - y->core;
174     if (x->processor_node != y->processor_node)
175 	return x->processor_node - y->processor_node;
176     if (x->processor != y->processor)
177 	return x->processor - y->processor;
178     if (x->node != y->node)
179 	return x->node - y->node;
180     return 0;
181 }
182 
183 static int
cpu_processor_spread_order_cmp(const void * vx,const void * vy)184 cpu_processor_spread_order_cmp(const void *vx, const void *vy)
185 {
186     erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
187     erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
188 
189     if (x->thread != y->thread)
190 	return x->thread - y->thread;
191     if (x->processor_node != y->processor_node)
192 	return x->processor_node - y->processor_node;
193     if (x->core != y->core)
194 	return x->core - y->core;
195     if (x->node != y->node)
196 	return x->node - y->node;
197     if (x->processor != y->processor)
198 	return x->processor - y->processor;
199     return 0;
200 }
201 
202 static int
cpu_thread_spread_order_cmp(const void * vx,const void * vy)203 cpu_thread_spread_order_cmp(const void *vx, const void *vy)
204 {
205     erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
206     erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
207 
208     if (x->thread != y->thread)
209 	return x->thread - y->thread;
210     if (x->node != y->node)
211 	return x->node - y->node;
212     if (x->processor != y->processor)
213 	return x->processor - y->processor;
214     if (x->processor_node != y->processor_node)
215 	return x->processor_node - y->processor_node;
216     if (x->core != y->core)
217 	return x->core - y->core;
218     return 0;
219 }
220 
221 static int
cpu_thread_no_node_processor_spread_order_cmp(const void * vx,const void * vy)222 cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
223 {
224     erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
225     erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
226 
227     if (x->thread != y->thread)
228 	return x->thread - y->thread;
229     if (x->node != y->node)
230 	return x->node - y->node;
231     if (x->core != y->core)
232 	return x->core - y->core;
233     if (x->processor != y->processor)
234 	return x->processor - y->processor;
235     return 0;
236 }
237 
238 static int
cpu_no_node_processor_spread_order_cmp(const void * vx,const void * vy)239 cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
240 {
241     erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
242     erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
243 
244     if (x->node != y->node)
245 	return x->node - y->node;
246     if (x->thread != y->thread)
247 	return x->thread - y->thread;
248     if (x->core != y->core)
249 	return x->core - y->core;
250     if (x->processor != y->processor)
251 	return x->processor - y->processor;
252     return 0;
253 }
254 
255 static int
cpu_no_node_thread_spread_order_cmp(const void * vx,const void * vy)256 cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
257 {
258     erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
259     erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
260 
261     if (x->node != y->node)
262 	return x->node - y->node;
263     if (x->thread != y->thread)
264 	return x->thread - y->thread;
265     if (x->processor != y->processor)
266 	return x->processor - y->processor;
267     if (x->core != y->core)
268 	return x->core - y->core;
269     return 0;
270 }
271 
272 static int
cpu_no_spread_order_cmp(const void * vx,const void * vy)273 cpu_no_spread_order_cmp(const void *vx, const void *vy)
274 {
275     erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
276     erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
277 
278     if (x->node != y->node)
279 	return x->node - y->node;
280     if (x->processor != y->processor)
281 	return x->processor - y->processor;
282     if (x->processor_node != y->processor_node)
283 	return x->processor_node - y->processor_node;
284     if (x->core != y->core)
285 	return x->core - y->core;
286     if (x->thread != y->thread)
287 	return x->thread - y->thread;
288     return 0;
289 }
290 
291 static ERTS_INLINE void
make_cpudata_id_seq(erts_cpu_topology_t * cpudata,int size,int no_node)292 make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
293 {
294     int ix;
295     int node = -1;
296     int processor = -1;
297     int processor_node = -1;
298     int processor_node_node = -1;
299     int core = -1;
300     int thread = -1;
301     int old_node = -1;
302     int old_processor = -1;
303     int old_processor_node = -1;
304     int old_core = -1;
305     int old_thread = -1;
306 
307     for (ix = 0; ix < size; ix++) {
308 	if (!no_node || cpudata[ix].node >= 0) {
309 	    if (old_node == cpudata[ix].node)
310 		cpudata[ix].node = node;
311 	    else {
312 		old_node = cpudata[ix].node;
313 		old_processor = processor = -1;
314 		if (!no_node)
315 		    old_processor_node = processor_node = -1;
316 		old_core = core = -1;
317 		old_thread = thread = -1;
318 		if (no_node || cpudata[ix].node >= 0)
319 		    cpudata[ix].node = ++node;
320 	    }
321 	}
322 	if (old_processor == cpudata[ix].processor)
323 	    cpudata[ix].processor = processor;
324 	else {
325 	    old_processor = cpudata[ix].processor;
326 	    if (!no_node)
327 		processor_node_node = old_processor_node = processor_node = -1;
328 	    old_core = core = -1;
329 	    old_thread = thread = -1;
330 	    cpudata[ix].processor = ++processor;
331 	}
332 	if (no_node && cpudata[ix].processor_node < 0)
333 	    old_processor_node = -1;
334 	else {
335 	    if (old_processor_node == cpudata[ix].processor_node) {
336 		if (no_node)
337 		    cpudata[ix].node = cpudata[ix].processor_node = node;
338 		else {
339 		    if (processor_node_node >= 0)
340 			cpudata[ix].node = processor_node_node;
341 		    cpudata[ix].processor_node = processor_node;
342 		}
343 	    }
344 	    else {
345 		old_processor_node = cpudata[ix].processor_node;
346 		old_core = core = -1;
347 		old_thread = thread = -1;
348 		if (no_node)
349 		    cpudata[ix].node = cpudata[ix].processor_node = ++node;
350 		else {
351 		    cpudata[ix].node = processor_node_node = ++node;
352 		    cpudata[ix].processor_node = ++processor_node;
353 		}
354 	    }
355 	}
356 	if (!no_node && cpudata[ix].processor_node < 0)
357 	    cpudata[ix].processor_node = 0;
358 	if (old_core == cpudata[ix].core)
359 	    cpudata[ix].core = core;
360 	else {
361 	    old_core = cpudata[ix].core;
362 	    old_thread = thread = -1;
363 	    cpudata[ix].core = ++core;
364 	}
365 	if (old_thread == cpudata[ix].thread)
366 	    cpudata[ix].thread = thread;
367 	else
368 	    old_thread = cpudata[ix].thread = ++thread;
369     }
370 }
371 
372 static void
cpu_bind_order_sort(erts_cpu_topology_t * cpudata,int size,ErtsCpuBindOrder bind_order,int mk_seq)373 cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
374 		    int size,
375 		    ErtsCpuBindOrder bind_order,
376 		    int mk_seq)
377 {
378     if (size > 1) {
379 	int no_node = 0;
380 	int (*cmp_func)(const void *, const void *);
381 	switch (bind_order) {
382 	case ERTS_CPU_BIND_SPREAD:
383 	    cmp_func = cpu_spread_order_cmp;
384 	    break;
385 	case ERTS_CPU_BIND_PROCESSOR_SPREAD:
386 	    cmp_func = cpu_processor_spread_order_cmp;
387 	    break;
388 	case ERTS_CPU_BIND_THREAD_SPREAD:
389 	    cmp_func = cpu_thread_spread_order_cmp;
390 	    break;
391 	case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
392 	    no_node = 1;
393 	    cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
394 	    break;
395 	case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
396 	    no_node = 1;
397 	    cmp_func = cpu_no_node_processor_spread_order_cmp;
398 	    break;
399 	case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
400 	    no_node = 1;
401 	    cmp_func = cpu_no_node_thread_spread_order_cmp;
402 	    break;
403 	case ERTS_CPU_BIND_NO_SPREAD:
404 	    cmp_func = cpu_no_spread_order_cmp;
405 	    break;
406 	default:
407 	    cmp_func = NULL;
408 	    erts_exit(ERTS_ABORT_EXIT,
409 		     "Bad cpu bind type: %d\n",
410 		     (int) cpu_bind_order);
411 	    break;
412 	}
413 
414 	if (mk_seq)
415 	    make_cpudata_id_seq(cpudata, size, no_node);
416 
417 	qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
418     }
419 }
420 
421 static int
processor_order_cmp(const void * vx,const void * vy)422 processor_order_cmp(const void *vx, const void *vy)
423 {
424     erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
425     erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
426 
427     if (x->processor != y->processor)
428 	return x->processor - y->processor;
429     if (x->node != y->node)
430 	return x->node - y->node;
431     if (x->processor_node != y->processor_node)
432 	return x->processor_node - y->processor_node;
433     if (x->core != y->core)
434 	return x->core - y->core;
435     if (x->thread != y->thread)
436 	return x->thread - y->thread;
437     return 0;
438 }
439 
440 void
erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData * esdp)441 erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp)
442 {
443     erts_cpu_groups_map_t *cgm;
444     erts_cpu_groups_callback_list_t *cgcl;
445     erts_cpu_groups_callback_call_t *cgcc;
446     int cgcc_ix;
447 
448     /* Unbind from cpu */
449     erts_rwmtx_rwlock(&cpuinfo_rwmtx);
450     if (scheduler2cpu_map[esdp->no].bound_id >= 0
451 	&& erts_unbind_from_cpu(cpuinfo) == 0) {
452 	esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
453     }
454 
455     cgcc = erts_alloc(ERTS_ALC_T_TMP,
456 		      (no_cpu_groups_callbacks
457 		       * sizeof(erts_cpu_groups_callback_call_t)));
458     cgcc_ix = 0;
459     for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
460 	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
461 	    cgcc[cgcc_ix].callback = cgcl->callback;
462 	    cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
463 	    cgcc[cgcc_ix].arg = cgcl->arg;
464 	    cgcc_ix++;
465 	}
466     }
467     ASSERT(no_cpu_groups_callbacks == cgcc_ix);
468     erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
469 
470     for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
471 	cgcc[cgcc_ix].callback(1,
472 			       esdp,
473 			       cgcc[cgcc_ix].ix,
474 			       cgcc[cgcc_ix].arg);
475 
476     erts_free(ERTS_ALC_T_TMP, cgcc);
477 
478     if (esdp->no <= max_main_threads)
479 	erts_thr_set_main_status(0, 0);
480 
481 }
482 
483 void
erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData * esdp)484 erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp)
485 {
486     ERTS_LC_ASSERT(erts_lc_runq_is_locked(esdp->run_queue));
487 
488     if (esdp->no <= max_main_threads)
489 	erts_thr_set_main_status(1, (int) esdp->no);
490 
491     /* Make sure we check if we should bind to a cpu or not... */
492     (void) ERTS_RUNQ_FLGS_SET(esdp->run_queue, ERTS_RUNQ_FLG_CHK_CPU_BIND);
493 }
494 
495 
496 void
erts_sched_check_cpu_bind(ErtsSchedulerData * esdp)497 erts_sched_check_cpu_bind(ErtsSchedulerData *esdp)
498 {
499     int res, cpu_id, cgcc_ix;
500     erts_cpu_groups_map_t *cgm;
501     erts_cpu_groups_callback_list_t *cgcl;
502     erts_cpu_groups_callback_call_t *cgcc;
503     erts_runq_unlock(esdp->run_queue);
504     erts_rwmtx_rwlock(&cpuinfo_rwmtx);
505     cpu_id = scheduler2cpu_map[esdp->no].bind_id;
506     if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
507 	res = erts_bind_to_cpu(cpuinfo, cpu_id);
508 	if (res == 0)
509 	    esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
510 	else {
511 	    erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
512 	    erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
513 			  (int) esdp->no, cpu_id, erl_errno_id(-res));
514 	    erts_send_error_to_logger_nogl(dsbufp);
515 	    if (scheduler2cpu_map[esdp->no].bound_id >= 0)
516 		goto unbind;
517 	}
518     }
519     else if (cpu_id < 0) {
520     unbind:
521 	/* Get rid of old binding */
522 	res = erts_unbind_from_cpu(cpuinfo);
523 	if (res == 0)
524 	    esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
525 	else if (res != -ENOTSUP) {
526 	    erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
527 	    erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
528 			  (int) esdp->no, cpu_id, erl_errno_id(-res));
529 	    erts_send_error_to_logger_nogl(dsbufp);
530 	}
531     }
532 
533     cgcc = erts_alloc(ERTS_ALC_T_TMP,
534 		      (no_cpu_groups_callbacks
535 		       * sizeof(erts_cpu_groups_callback_call_t)));
536     cgcc_ix = 0;
537     for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
538 	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
539 	    cgcc[cgcc_ix].callback = cgcl->callback;
540 	    cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
541 	    cgcc[cgcc_ix].arg = cgcl->arg;
542 	    cgcc_ix++;
543 	}
544     }
545 
546     ASSERT(no_cpu_groups_callbacks == cgcc_ix);
547     erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
548 
549     for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
550 	cgcc[cgcc_ix].callback(0,
551 			       esdp,
552 			       cgcc[cgcc_ix].ix,
553 			       cgcc[cgcc_ix].arg);
554 
555     erts_free(ERTS_ALC_T_TMP, cgcc);
556 
557     erts_runq_lock(esdp->run_queue);
558 }
559 
560 void
erts_sched_init_check_cpu_bind(ErtsSchedulerData * esdp)561 erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp)
562 {
563     int cgcc_ix;
564     erts_cpu_groups_map_t *cgm;
565     erts_cpu_groups_callback_list_t *cgcl;
566     erts_cpu_groups_callback_call_t *cgcc;
567 
568     erts_rwmtx_rlock(&cpuinfo_rwmtx);
569 
570     cgcc = erts_alloc(ERTS_ALC_T_TMP,
571 		      (no_cpu_groups_callbacks
572 		       * sizeof(erts_cpu_groups_callback_call_t)));
573     cgcc_ix = 0;
574     for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
575 	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
576 	    cgcc[cgcc_ix].callback = cgcl->callback;
577 	    cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
578 	    cgcc[cgcc_ix].arg = cgcl->arg;
579 	    cgcc_ix++;
580 	}
581     }
582 
583     ASSERT(no_cpu_groups_callbacks == cgcc_ix);
584     erts_rwmtx_runlock(&cpuinfo_rwmtx);
585 
586     for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
587 	cgcc[cgcc_ix].callback(0,
588 			       esdp,
589 			       cgcc[cgcc_ix].ix,
590 			       cgcc[cgcc_ix].arg);
591 
592     erts_free(ERTS_ALC_T_TMP, cgcc);
593 
594     if (esdp->no <= max_main_threads)
595 	erts_thr_set_main_status(1, (int) esdp->no);
596 }
597 
598 static void
write_schedulers_bind_change(erts_cpu_topology_t * cpudata,int size)599 write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
600 {
601     int s_ix = 1;
602     int cpu_ix;
603 
604     ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
605 
606     if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
607 
608 	cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
609 
610 	for (cpu_ix = 0; cpu_ix < size && s_ix <= erts_no_schedulers; cpu_ix++)
611 	    if (erts_is_cpu_available(cpuinfo, cpudata[cpu_ix].logical))
612 		scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
613     }
614 
615     if (s_ix <= erts_no_schedulers)
616 	for (; s_ix <= erts_no_schedulers; s_ix++)
617 	    scheduler2cpu_map[s_ix].bind_id = -1;
618 }
619 
620 int
erts_init_scheduler_bind_type_string(char * how)621 erts_init_scheduler_bind_type_string(char *how)
622 {
623     ErtsCpuBindOrder order;
624 
625     if (sys_strcmp(how, "u") == 0)
626 	order = ERTS_CPU_BIND_NONE;
627     else if (sys_strcmp(how, "db") == 0)
628 	order = ERTS_CPU_BIND_DEFAULT_BIND;
629     else if (sys_strcmp(how, "s") == 0)
630 	order = ERTS_CPU_BIND_SPREAD;
631     else if (sys_strcmp(how, "ps") == 0)
632 	order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
633     else if (sys_strcmp(how, "ts") == 0)
634 	order = ERTS_CPU_BIND_THREAD_SPREAD;
635     else if (sys_strcmp(how, "tnnps") == 0)
636 	order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
637     else if (sys_strcmp(how, "nnps") == 0)
638 	order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
639     else if (sys_strcmp(how, "nnts") == 0)
640 	order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
641     else if (sys_strcmp(how, "ns") == 0)
642 	order = ERTS_CPU_BIND_NO_SPREAD;
643     else
644 	return ERTS_INIT_SCHED_BIND_TYPE_ERROR_BAD_TYPE;
645 
646     if (order != ERTS_CPU_BIND_NONE) {
647 	if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP)
648 	    return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
649 	else if (!system_cpudata && !user_cpudata)
650 	    return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
651     }
652 
653     cpu_bind_order = order;
654 
655     return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
656 }
657 
658 static Eterm
bound_schedulers_term(ErtsCpuBindOrder order)659 bound_schedulers_term(ErtsCpuBindOrder order)
660 {
661     switch (order) {
662     case ERTS_CPU_BIND_SPREAD: {
663 	ERTS_DECL_AM(spread);
664 	return AM_spread;
665     }
666     case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
667 	ERTS_DECL_AM(processor_spread);
668 	return AM_processor_spread;
669     }
670     case ERTS_CPU_BIND_THREAD_SPREAD: {
671 	ERTS_DECL_AM(thread_spread);
672 	return AM_thread_spread;
673     }
674     case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
675 	ERTS_DECL_AM(thread_no_node_processor_spread);
676 	return AM_thread_no_node_processor_spread;
677     }
678     case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
679 	ERTS_DECL_AM(no_node_processor_spread);
680 	return AM_no_node_processor_spread;
681     }
682     case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
683 	ERTS_DECL_AM(no_node_thread_spread);
684 	return AM_no_node_thread_spread;
685     }
686     case ERTS_CPU_BIND_NO_SPREAD: {
687 	ERTS_DECL_AM(no_spread);
688 	return AM_no_spread;
689     }
690     case ERTS_CPU_BIND_NONE: {
691 	ERTS_DECL_AM(unbound);
692 	return AM_unbound;
693     }
694     default:
695 	ASSERT(0);
696 	return THE_NON_VALUE;
697     }
698 }
699 
700 Eterm
erts_bound_schedulers_term(Process * c_p)701 erts_bound_schedulers_term(Process *c_p)
702 {
703     ErtsCpuBindOrder order;
704     erts_rwmtx_rlock(&cpuinfo_rwmtx);
705     order = cpu_bind_order;
706     erts_rwmtx_runlock(&cpuinfo_rwmtx);
707     return bound_schedulers_term(order);
708 }
709 
710 Eterm
erts_bind_schedulers(Process * c_p,Eterm how)711 erts_bind_schedulers(Process *c_p, Eterm how)
712 {
713     int notify = 0;
714     Eterm res;
715     erts_cpu_topology_t *cpudata;
716     int cpudata_size;
717     ErtsCpuBindOrder old_cpu_bind_order;
718 
719     erts_rwmtx_rwlock(&cpuinfo_rwmtx);
720 
721     if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP) {
722 	if (cpu_bind_order == ERTS_CPU_BIND_NONE
723 	    && ERTS_IS_ATOM_STR("unbound", how)) {
724 	    res = bound_schedulers_term(ERTS_CPU_BIND_NONE);
725 	    goto done;
726 	}
727 	ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
728     }
729     else {
730 
731 	old_cpu_bind_order = cpu_bind_order;
732 
733 	if (ERTS_IS_ATOM_STR("default_bind", how))
734 	    cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
735 	else if (ERTS_IS_ATOM_STR("spread", how))
736 	    cpu_bind_order = ERTS_CPU_BIND_SPREAD;
737 	else if (ERTS_IS_ATOM_STR("processor_spread", how))
738 	    cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
739 	else if (ERTS_IS_ATOM_STR("thread_spread", how))
740 	    cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
741 	else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
742 	    cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
743 	else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
744 	    cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
745 	else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
746 	    cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
747 	else if (ERTS_IS_ATOM_STR("no_spread", how))
748 	    cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
749 	else if (ERTS_IS_ATOM_STR("unbound", how))
750 	    cpu_bind_order = ERTS_CPU_BIND_NONE;
751 	else {
752 	    cpu_bind_order = old_cpu_bind_order;
753 	    ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
754 	    goto done;
755 	}
756 
757 	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
758 
759 	if (!cpudata) {
760 	    cpu_bind_order = old_cpu_bind_order;
761 	    ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
762 	    goto done;
763 	}
764 
765 	write_schedulers_bind_change(cpudata, cpudata_size);
766 	notify = 1;
767 
768 	destroy_tmp_cpu_topology_copy(cpudata);
769 
770 	res = bound_schedulers_term(old_cpu_bind_order);
771     }
772 
773  done:
774 
775     erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
776 
777     if (notify)
778 	erts_sched_notify_check_cpu_bind();
779 
780     return res;
781 }
782 
783 int
erts_sched_bind_atthrcreate_prepare(void)784 erts_sched_bind_atthrcreate_prepare(void)
785 {
786     ErtsSchedulerData *esdp = erts_get_scheduler_data();
787     return esdp != NULL && erts_is_scheduler_bound(esdp);
788 }
789 
790 int
erts_sched_bind_atthrcreate_child(int unbind)791 erts_sched_bind_atthrcreate_child(int unbind)
792 {
793     int res = 0;
794     if (unbind) {
795 	erts_rwmtx_rlock(&cpuinfo_rwmtx);
796 	res = erts_unbind_from_cpu(cpuinfo);
797 	erts_rwmtx_runlock(&cpuinfo_rwmtx);
798     }
799     return res;
800 }
801 
802 void
erts_sched_bind_atthrcreate_parent(int unbind)803 erts_sched_bind_atthrcreate_parent(int unbind)
804 {
805 
806 }
807 
808 int
erts_sched_bind_atfork_prepare(void)809 erts_sched_bind_atfork_prepare(void)
810 {
811     ErtsSchedulerData *esdp = erts_get_scheduler_data();
812     int unbind = esdp != NULL && erts_is_scheduler_bound(esdp);
813     if (unbind)
814 	erts_rwmtx_rlock(&cpuinfo_rwmtx);
815     return unbind;
816 }
817 
818 int
erts_sched_bind_atfork_child(int unbind)819 erts_sched_bind_atfork_child(int unbind)
820 {
821     if (unbind) {
822 	ERTS_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
823 			   || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
824 	return erts_unbind_from_cpu(cpuinfo);
825     }
826     return 0;
827 }
828 
829 void
erts_sched_bind_atfork_parent(int unbind)830 erts_sched_bind_atfork_parent(int unbind)
831 {
832     if (unbind)
833 	erts_rwmtx_runlock(&cpuinfo_rwmtx);
834 }
835 
836 Eterm
erts_fake_scheduler_bindings(Process * p,Eterm how)837 erts_fake_scheduler_bindings(Process *p, Eterm how)
838 {
839     ErtsCpuBindOrder fake_cpu_bind_order;
840     erts_cpu_topology_t *cpudata;
841     int cpudata_size;
842     Eterm res;
843 
844     if (ERTS_IS_ATOM_STR("default_bind", how))
845 	fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
846     else if (ERTS_IS_ATOM_STR("spread", how))
847 	fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
848     else if (ERTS_IS_ATOM_STR("processor_spread", how))
849 	fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
850     else if (ERTS_IS_ATOM_STR("thread_spread", how))
851 	fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
852     else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
853 	fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
854     else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
855 	fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
856     else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
857 	fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
858     else if (ERTS_IS_ATOM_STR("no_spread", how))
859 	fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
860     else if (ERTS_IS_ATOM_STR("unbound", how))
861 	fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
862     else {
863 	ERTS_BIF_PREP_ERROR(res, p, BADARG);
864 	return res;
865     }
866 
867     erts_rwmtx_rlock(&cpuinfo_rwmtx);
868     create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
869     erts_rwmtx_runlock(&cpuinfo_rwmtx);
870 
871     if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
872 	ERTS_BIF_PREP_RET(res, am_false);
873     else {
874 	int i;
875 	Eterm *hp;
876 
877 	cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
878 
879 #ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
880 
881 	erts_fprintf(stderr, "node:          ");
882 	for (i = 0; i < cpudata_size; i++)
883 	    erts_fprintf(stderr, " %2d", cpudata[i].node);
884 	erts_fprintf(stderr, "\n");
885 	erts_fprintf(stderr, "processor:     ");
886 	for (i = 0; i < cpudata_size; i++)
887 	    erts_fprintf(stderr, " %2d", cpudata[i].processor);
888 	erts_fprintf(stderr, "\n");
889 	if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
890 	    && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
891 	    && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
892 	    erts_fprintf(stderr, "processor_node:");
893 	    for (i = 0; i < cpudata_size; i++)
894 		erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
895 	    erts_fprintf(stderr, "\n");
896 	}
897 	erts_fprintf(stderr, "core:          ");
898 	for (i = 0; i < cpudata_size; i++)
899 	    erts_fprintf(stderr, " %2d", cpudata[i].core);
900 	erts_fprintf(stderr, "\n");
901 	erts_fprintf(stderr, "thread:        ");
902 	for (i = 0; i < cpudata_size; i++)
903 	    erts_fprintf(stderr, " %2d", cpudata[i].thread);
904 	erts_fprintf(stderr, "\n");
905 	erts_fprintf(stderr, "logical:       ");
906 	for (i = 0; i < cpudata_size; i++)
907 	    erts_fprintf(stderr, " %2d", cpudata[i].logical);
908 	erts_fprintf(stderr, "\n");
909 #endif
910 
911 	hp = HAlloc(p, cpudata_size+1);
912 	ERTS_BIF_PREP_RET(res, make_tuple(hp));
913 	*hp++ = make_arityval((Uint) cpudata_size);
914 	for (i = 0; i < cpudata_size; i++)
915 	    *hp++ = make_small((Uint) cpudata[i].logical);
916     }
917 
918     destroy_tmp_cpu_topology_copy(cpudata);
919 
920     return res;
921 }
922 
923 Eterm
erts_get_schedulers_binds(Process * c_p)924 erts_get_schedulers_binds(Process *c_p)
925 {
926     int ix;
927     ERTS_DECL_AM(unbound);
928     Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
929     Eterm res = make_tuple(hp);
930 
931     *(hp++) = make_arityval(erts_no_schedulers);
932     erts_rwmtx_rlock(&cpuinfo_rwmtx);
933     for (ix = 1; ix <= erts_no_schedulers; ix++)
934 	*(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
935 		   ? make_small(scheduler2cpu_map[ix].bound_id)
936 		   : AM_unbound);
937     erts_rwmtx_runlock(&cpuinfo_rwmtx);
938     return res;
939 }
940 
941 /*
942  * CPU topology
943  */
944 
945 typedef struct {
946     int *id;
947     int used;
948     int size;
949 } ErtsCpuTopIdSeq;
950 
951 typedef struct {
952     ErtsCpuTopIdSeq logical;
953     ErtsCpuTopIdSeq thread;
954     ErtsCpuTopIdSeq core;
955     ErtsCpuTopIdSeq processor_node;
956     ErtsCpuTopIdSeq processor;
957     ErtsCpuTopIdSeq node;
958 } ErtsCpuTopEntry;
959 
960 static void
init_cpu_top_entry(ErtsCpuTopEntry * cte)961 init_cpu_top_entry(ErtsCpuTopEntry *cte)
962 {
963     int size = 10;
964     cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
965 				 sizeof(int)*size);
966     cte->logical.size = size;
967     cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
968 				sizeof(int)*size);
969     cte->thread.size = size;
970     cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
971 			      sizeof(int)*size);
972     cte->core.size = size;
973     cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
974 					sizeof(int)*size);
975     cte->processor_node.size = size;
976     cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
977 				   sizeof(int)*size);
978     cte->processor.size = size;
979     cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
980 			      sizeof(int)*size);
981     cte->node.size = size;
982 }
983 
984 static void
destroy_cpu_top_entry(ErtsCpuTopEntry * cte)985 destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
986 {
987     erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
988     erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
989     erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
990     erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
991     erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
992     erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
993 }
994 
995 static int
get_cput_value_or_range(int * v,int * vr,char ** str)996 get_cput_value_or_range(int *v, int *vr, char **str)
997 {
998     long l;
999     char *c = *str;
1000     errno = 0;
1001     if (!isdigit((unsigned char)*c))
1002 	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
1003     l = strtol(c, &c, 10);
1004     if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
1005 	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
1006     *v = (int) l;
1007     if (*c == '-') {
1008 	c++;
1009 	if (!isdigit((unsigned char)*c))
1010 	    return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1011 	l = strtol(c, &c, 10);
1012 	if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
1013 	    return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1014 	*vr = (int) l;
1015     }
1016     *str = c;
1017     return ERTS_INIT_CPU_TOPOLOGY_OK;
1018 }
1019 
1020 static int
get_cput_id_seq(ErtsCpuTopIdSeq * idseq,char ** str)1021 get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
1022 {
1023     int ix = 0;
1024     int need_size = 0;
1025     char *c = *str;
1026 
1027     while (1) {
1028 	int res;
1029 	int val;
1030 	int nids;
1031 	int val_range = -1;
1032 	res = get_cput_value_or_range(&val, &val_range, &c);
1033 	if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
1034 	    return res;
1035 	if (val_range < 0 || val_range == val)
1036 	    nids = 1;
1037 	else {
1038 	    if (val_range > val)
1039 		nids = val_range - val + 1;
1040 	    else
1041 		nids = val - val_range + 1;
1042 	}
1043 	need_size += nids;
1044 	if (need_size > idseq->size) {
1045 	    idseq->size = need_size + 10;
1046 	    idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
1047 				      idseq->id,
1048 				      sizeof(int)*idseq->size);
1049 	}
1050 	if (nids == 1)
1051 	    idseq->id[ix++] = val;
1052 	else if (val_range > val) {
1053 	    for (; val <= val_range; val++)
1054 		idseq->id[ix++] = val;
1055 	}
1056 	else {
1057 	    for (; val >= val_range; val--)
1058 		idseq->id[ix++] = val;
1059 	}
1060 	if (*c != ',')
1061 	    break;
1062 	c++;
1063     }
1064     *str = c;
1065     idseq->used = ix;
1066     return ERTS_INIT_CPU_TOPOLOGY_OK;
1067 }
1068 
1069 static int
get_cput_entry(ErtsCpuTopEntry * cput,char ** str)1070 get_cput_entry(ErtsCpuTopEntry *cput, char **str)
1071 {
1072     int h;
1073     char *c = *str;
1074 
1075     cput->logical.used = 0;
1076     cput->thread.id[0] = 0;
1077     cput->thread.used = 1;
1078     cput->core.id[0] = 0;
1079     cput->core.used = 1;
1080     cput->processor_node.id[0] = -1;
1081     cput->processor_node.used = 1;
1082     cput->processor.id[0] = 0;
1083     cput->processor.used = 1;
1084     cput->node.id[0] = -1;
1085     cput->node.used = 1;
1086 
1087     h = ERTS_TOPOLOGY_MAX_DEPTH;
1088     while (*c != ':' && *c != '\0') {
1089 	int res;
1090 	ErtsCpuTopIdSeq *idseqp;
1091 	switch (*c++) {
1092 	case 'L':
1093 	    if (h <= ERTS_TOPOLOGY_LOGICAL)
1094 		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1095 	    idseqp = &cput->logical;
1096 	    h = ERTS_TOPOLOGY_LOGICAL;
1097 	    break;
1098 	case 't':
1099 	case 'T':
1100 	    if (h <= ERTS_TOPOLOGY_THREAD)
1101 		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1102 	    idseqp = &cput->thread;
1103 	    h = ERTS_TOPOLOGY_THREAD;
1104 	    break;
1105 	case 'c':
1106 	case 'C':
1107 	    if (h <= ERTS_TOPOLOGY_CORE)
1108 		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1109 	    idseqp = &cput->core;
1110 	    h = ERTS_TOPOLOGY_CORE;
1111 	    break;
1112 	case 'p':
1113 	case 'P':
1114 	    if (h <= ERTS_TOPOLOGY_PROCESSOR)
1115 		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1116 	    idseqp = &cput->processor;
1117 	    h = ERTS_TOPOLOGY_PROCESSOR;
1118 	    break;
1119 	case 'n':
1120 	case 'N':
1121 	    if (h <= ERTS_TOPOLOGY_PROCESSOR) {
1122 	    do_node:
1123 		if (h <= ERTS_TOPOLOGY_NODE)
1124 		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1125 		idseqp = &cput->node;
1126 		h = ERTS_TOPOLOGY_NODE;
1127 	    }
1128 	    else {
1129 		int p_node = 0;
1130 		char *p_chk = c;
1131 		while (*p_chk != '\0' && *p_chk != ':') {
1132 		    if (*p_chk == 'p' || *p_chk == 'P') {
1133 			p_node = 1;
1134 			break;
1135 		    }
1136 		    p_chk++;
1137 		}
1138 		if (!p_node)
1139 		    goto do_node;
1140 		if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
1141 		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1142 		idseqp = &cput->processor_node;
1143 		h = ERTS_TOPOLOGY_PROCESSOR_NODE;
1144 	    }
1145 	    break;
1146 	default:
1147 	    return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
1148 	}
1149 	res = get_cput_id_seq(idseqp, &c);
1150 	if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
1151 		return res;
1152     }
1153 
1154     if (cput->logical.used < 1)
1155 	return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
1156 
1157     if (*c == ':') {
1158 	c++;
1159     }
1160 
1161     if (cput->thread.used != 1
1162 	&& cput->thread.used != cput->logical.used)
1163 	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1164     if (cput->core.used != 1
1165 	&& cput->core.used != cput->logical.used)
1166 	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1167     if (cput->processor_node.used != 1
1168 	&& cput->processor_node.used != cput->logical.used)
1169 	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1170     if (cput->processor.used != 1
1171 	&& cput->processor.used != cput->logical.used)
1172 	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1173     if (cput->node.used != 1
1174 	&& cput->node.used != cput->logical.used)
1175 	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1176 
1177     *str = c;
1178     return ERTS_INIT_CPU_TOPOLOGY_OK;
1179 }
1180 
1181 static int
verify_topology(erts_cpu_topology_t * cpudata,int size)1182 verify_topology(erts_cpu_topology_t *cpudata, int size)
1183 {
1184     if (size > 0) {
1185 	int *logical;
1186 	int node, processor, no_nodes, i;
1187 
1188 	/* Verify logical ids */
1189 	logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
1190 
1191 	for (i = 0; i < size; i++)
1192 	    logical[i] = cpudata[i].logical;
1193 
1194 	qsort(logical, size, sizeof(int), int_cmp);
1195 	for (i = 0; i < size-1; i++) {
1196 	    if (logical[i] == logical[i+1]) {
1197 		erts_free(ERTS_ALC_T_TMP, logical);
1198 		return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
1199 	    }
1200 	}
1201 
1202 	erts_free(ERTS_ALC_T_TMP, logical);
1203 
1204 	qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
1205 
1206 	/* Verify unique entities */
1207 
1208 	for (i = 1; i < size; i++) {
1209 	    if (cpudata[i-1].processor == cpudata[i].processor
1210 		&& cpudata[i-1].node == cpudata[i].node
1211 		&& (cpudata[i-1].processor_node
1212 		    == cpudata[i].processor_node)
1213 		&& cpudata[i-1].core == cpudata[i].core
1214 		&& cpudata[i-1].thread == cpudata[i].thread) {
1215 		return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
1216 	    }
1217 	}
1218 
1219 	/* Verify numa nodes */
1220 	node = cpudata[0].node;
1221 	processor = cpudata[0].processor;
1222 	no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
1223 	for (i = 1; i < size; i++) {
1224 	    if (no_nodes) {
1225 		if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
1226 		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
1227 	    }
1228 	    else {
1229 		if (cpudata[i].processor == processor && cpudata[i].node != node)
1230 		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
1231 		node = cpudata[i].node;
1232 		processor = cpudata[i].processor;
1233 		if (node >= 0 && cpudata[i].processor_node >= 0)
1234 		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
1235 		if (node < 0 && cpudata[i].processor_node < 0)
1236 		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
1237 	    }
1238 	}
1239     }
1240 
1241     return ERTS_INIT_CPU_TOPOLOGY_OK;
1242 }
1243 
1244 int
erts_init_cpu_topology_string(char * topology_str)1245 erts_init_cpu_topology_string(char *topology_str)
1246 {
1247     ErtsCpuTopEntry cput;
1248     int need_size;
1249     char *c;
1250     int ix;
1251     int error = ERTS_INIT_CPU_TOPOLOGY_OK;
1252 
1253     if (user_cpudata)
1254 	erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
1255     user_cpudata_size = 10;
1256 
1257     user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
1258 			      (sizeof(erts_cpu_topology_t)
1259 			       * user_cpudata_size));
1260 
1261     init_cpu_top_entry(&cput);
1262 
1263     ix = 0;
1264     need_size = 0;
1265 
1266     c = topology_str;
1267     if (*c == '\0') {
1268 	error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
1269 	goto fail;
1270     }
1271     do {
1272 	int r;
1273 	error = get_cput_entry(&cput, &c);
1274 	if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
1275 	    goto fail;
1276 	need_size += cput.logical.used;
1277 	if (user_cpudata_size < need_size) {
1278 	    user_cpudata_size = need_size + 10;
1279 	    user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
1280 					user_cpudata,
1281 					(sizeof(erts_cpu_topology_t)
1282 					 * user_cpudata_size));
1283 	}
1284 
1285 	ASSERT(cput.thread.used == 1
1286 	       || cput.thread.used == cput.logical.used);
1287 	ASSERT(cput.core.used == 1
1288 	       || cput.core.used == cput.logical.used);
1289 	ASSERT(cput.processor_node.used == 1
1290 	       || cput.processor_node.used == cput.logical.used);
1291 	ASSERT(cput.processor.used == 1
1292 	       || cput.processor.used == cput.logical.used);
1293 	ASSERT(cput.node.used == 1
1294 	       || cput.node.used == cput.logical.used);
1295 
1296 	for (r = 0; r < cput.logical.used; r++) {
1297 	    user_cpudata[ix].logical = cput.logical.id[r];
1298 	    user_cpudata[ix].thread =
1299 		cput.thread.id[cput.thread.used == 1 ? 0 : r];
1300 	    user_cpudata[ix].core =
1301 		cput.core.id[cput.core.used == 1 ? 0 : r];
1302 	    user_cpudata[ix].processor_node =
1303 		cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
1304 	    user_cpudata[ix].processor =
1305 		cput.processor.id[cput.processor.used == 1 ? 0 : r];
1306 	    user_cpudata[ix].node =
1307 		cput.node.id[cput.node.used == 1 ? 0 : r];
1308 	    ix++;
1309 	}
1310     } while (*c != '\0');
1311 
1312     if (user_cpudata_size != ix) {
1313 	user_cpudata_size = ix;
1314 	user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
1315 				    user_cpudata,
1316 				    (sizeof(erts_cpu_topology_t)
1317 				     * user_cpudata_size));
1318     }
1319 
1320     error = verify_topology(user_cpudata, user_cpudata_size);
1321     if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
1322 	destroy_cpu_top_entry(&cput);
1323 	return ERTS_INIT_CPU_TOPOLOGY_OK;
1324     }
1325 
1326  fail:
1327     if (user_cpudata)
1328 	erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
1329     user_cpudata_size = 0;
1330     destroy_cpu_top_entry(&cput);
1331     return error;
1332 }
1333 
1334 #define ERTS_GET_CPU_TOPOLOGY_ERROR		-1
1335 #define ERTS_GET_USED_CPU_TOPOLOGY		0
1336 #define ERTS_GET_DETECTED_CPU_TOPOLOGY		1
1337 #define ERTS_GET_DEFINED_CPU_TOPOLOGY		2
1338 
1339 static Eterm get_cpu_topology_term(Process *c_p, int type);
1340 
1341 Eterm
erts_set_cpu_topology(Process * c_p,Eterm term)1342 erts_set_cpu_topology(Process *c_p, Eterm term)
1343 {
1344     erts_cpu_topology_t *cpudata = NULL;
1345     int cpudata_size = 0;
1346     Eterm res;
1347 
1348     erts_rwmtx_rwlock(&cpuinfo_rwmtx);
1349     res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
1350     if (term == am_undefined) {
1351 	if (user_cpudata)
1352 	    erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
1353 	user_cpudata = NULL;
1354 	user_cpudata_size = 0;
1355 
1356 	if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
1357 	    cpudata_size = system_cpudata_size;
1358 	    cpudata = erts_alloc(ERTS_ALC_T_TMP,
1359 				 (sizeof(erts_cpu_topology_t)
1360 				  * cpudata_size));
1361 
1362 	    sys_memcpy((void *) cpudata,
1363 		       (void *) system_cpudata,
1364 		       sizeof(erts_cpu_topology_t)*cpudata_size);
1365 	}
1366     }
1367     else if (is_not_list(term)) {
1368     error:
1369 	erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1370 	res = THE_NON_VALUE;
1371 	goto done;
1372     }
1373     else {
1374 	Eterm list = term;
1375 	int ix = 0;
1376 
1377 	cpudata_size = 100;
1378 	cpudata = erts_alloc(ERTS_ALC_T_TMP,
1379 			     (sizeof(erts_cpu_topology_t)
1380 			      * cpudata_size));
1381 
1382 	while (is_list(list)) {
1383 	    Eterm *lp = list_val(list);
1384 	    Eterm cpu = CAR(lp);
1385 	    Eterm* tp;
1386 	    Sint id;
1387 
1388 	    if (is_not_tuple(cpu))
1389 		goto error;
1390 
1391 	    tp = tuple_val(cpu);
1392 
1393 	    if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
1394 		goto error;
1395 
1396 	    if (ix >= cpudata_size) {
1397 		cpudata_size += 100;
1398 		cpudata = erts_realloc(ERTS_ALC_T_TMP,
1399 				       cpudata,
1400 				       (sizeof(erts_cpu_topology_t)
1401 					* cpudata_size));
1402 	    }
1403 
1404 	    id = signed_val(tp[2]);
1405 	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1406 		goto error;
1407 	    cpudata[ix].node = (int) id;
1408 
1409 	    id = signed_val(tp[3]);
1410 	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1411 		goto error;
1412 	    cpudata[ix].processor = (int) id;
1413 
1414 	    id = signed_val(tp[4]);
1415 	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1416 		goto error;
1417 	    cpudata[ix].processor_node = (int) id;
1418 
1419 	    id = signed_val(tp[5]);
1420 	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1421 		goto error;
1422 	    cpudata[ix].core = (int) id;
1423 
1424 	    id = signed_val(tp[6]);
1425 	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1426 		goto error;
1427 	    cpudata[ix].thread = (int) id;
1428 
1429 	    id = signed_val(tp[7]);
1430 	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1431 		goto error;
1432 	    cpudata[ix].logical = (int) id;
1433 
1434 	    list = CDR(lp);
1435 	    ix++;
1436 	}
1437 
1438 	if (is_not_nil(list))
1439 	    goto error;
1440 
1441 	cpudata_size = ix;
1442 
1443 	if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
1444 	    goto error;
1445 
1446 	if (user_cpudata_size != cpudata_size) {
1447 	    if (user_cpudata)
1448 		erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
1449 	    user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
1450 				      sizeof(erts_cpu_topology_t)*cpudata_size);
1451 	    user_cpudata_size = cpudata_size;
1452 	}
1453 
1454 	sys_memcpy((void *) user_cpudata,
1455 		   (void *) cpudata,
1456 		   sizeof(erts_cpu_topology_t)*cpudata_size);
1457     }
1458 
1459     update_cpu_groups_maps();
1460 
1461     write_schedulers_bind_change(cpudata, cpudata_size);
1462 
1463     erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1464     erts_sched_notify_check_cpu_bind();
1465 
1466  done:
1467 
1468     if (cpudata)
1469 	erts_free(ERTS_ALC_T_TMP, cpudata);
1470 
1471     return res;
1472 }
1473 
1474 static void
create_tmp_cpu_topology_copy(erts_cpu_topology_t ** cpudata,int * cpudata_size)1475 create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
1476 {
1477     if (user_cpudata) {
1478 	*cpudata_size = user_cpudata_size;
1479 	*cpudata = erts_alloc(ERTS_ALC_T_TMP,
1480 			      (sizeof(erts_cpu_topology_t)
1481 			       * (*cpudata_size)));
1482 	sys_memcpy((void *) *cpudata,
1483 		   (void *) user_cpudata,
1484 		   sizeof(erts_cpu_topology_t)*(*cpudata_size));
1485     }
1486     else if (system_cpudata) {
1487 	*cpudata_size = system_cpudata_size;
1488 	*cpudata = erts_alloc(ERTS_ALC_T_TMP,
1489 			      (sizeof(erts_cpu_topology_t)
1490 			       * (*cpudata_size)));
1491 	sys_memcpy((void *) *cpudata,
1492 		   (void *) system_cpudata,
1493 		   sizeof(erts_cpu_topology_t)*(*cpudata_size));
1494     }
1495     else {
1496 	*cpudata = NULL;
1497 	*cpudata_size = 0;
1498     }
1499 }
1500 
1501 static void
destroy_tmp_cpu_topology_copy(erts_cpu_topology_t * cpudata)1502 destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
1503 {
1504     if (cpudata)
1505 	erts_free(ERTS_ALC_T_TMP, cpudata);
1506 }
1507 
1508 
1509 static Eterm
bld_topology_term(Eterm ** hpp,Uint * hszp,erts_cpu_topology_t * cpudata,int size)1510 bld_topology_term(Eterm **hpp,
1511 		  Uint *hszp,
1512 		  erts_cpu_topology_t *cpudata,
1513 		  int size)
1514 {
1515     Eterm res = NIL;
1516     int i;
1517 
1518     if (size == 0)
1519 	return am_undefined;
1520 
1521     for (i = size-1; i >= 0; i--) {
1522 	res = erts_bld_cons(hpp,
1523 			    hszp,
1524 			    erts_bld_tuple(hpp,
1525 					   hszp,
1526 					   7,
1527 					   am_cpu,
1528 					   make_small(cpudata[i].node),
1529 					   make_small(cpudata[i].processor),
1530 					   make_small(cpudata[i].processor_node),
1531 					   make_small(cpudata[i].core),
1532 					   make_small(cpudata[i].thread),
1533 					   make_small(cpudata[i].logical)),
1534 			    res);
1535     }
1536     return res;
1537 }
1538 
1539 static Eterm
get_cpu_topology_term(Process * c_p,int type)1540 get_cpu_topology_term(Process *c_p, int type)
1541 {
1542 #ifdef DEBUG
1543     Eterm *hp_end;
1544 #endif
1545     Eterm *hp;
1546     Uint hsz;
1547     Eterm res = THE_NON_VALUE;
1548     erts_cpu_topology_t *cpudata = NULL;
1549     int size = 0;
1550 
1551     switch (type) {
1552     case ERTS_GET_USED_CPU_TOPOLOGY:
1553 	if (user_cpudata)
1554 	    goto defined;
1555 	else
1556 	    goto detected;
1557     case ERTS_GET_DETECTED_CPU_TOPOLOGY:
1558     detected:
1559 	if (!system_cpudata)
1560 	    res = am_undefined;
1561 	else {
1562 	    size = system_cpudata_size;
1563 	    cpudata = erts_alloc(ERTS_ALC_T_TMP,
1564 				 (sizeof(erts_cpu_topology_t)
1565 				  * size));
1566 	    sys_memcpy((void *) cpudata,
1567 		       (void *) system_cpudata,
1568 		       sizeof(erts_cpu_topology_t)*size);
1569 	}
1570 	break;
1571     case ERTS_GET_DEFINED_CPU_TOPOLOGY:
1572     defined:
1573 	if (!user_cpudata)
1574 	    res = am_undefined;
1575 	else {
1576 	    size = user_cpudata_size;
1577 	    cpudata = user_cpudata;
1578 	}
1579 	break;
1580     default:
1581 	erts_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
1582 	break;
1583     }
1584 
1585     if (res == am_undefined) {
1586 	ASSERT(!cpudata);
1587 	return res;
1588     }
1589 
1590     hsz = 0;
1591 
1592     bld_topology_term(NULL, &hsz,
1593 		      cpudata, size);
1594 
1595     hp = HAlloc(c_p, hsz);
1596 
1597 #ifdef DEBUG
1598     hp_end = hp + hsz;
1599 #endif
1600 
1601     res = bld_topology_term(&hp, NULL,
1602 			    cpudata, size);
1603 
1604     ASSERT(hp_end == hp);
1605 
1606     if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
1607 	erts_free(ERTS_ALC_T_TMP, cpudata);
1608 
1609     return res;
1610 }
1611 
1612 Eterm
erts_get_cpu_topology_term(Process * c_p,Eterm which)1613 erts_get_cpu_topology_term(Process *c_p, Eterm which)
1614 {
1615     Eterm res;
1616     int type;
1617     erts_rwmtx_rlock(&cpuinfo_rwmtx);
1618     if (ERTS_IS_ATOM_STR("used", which))
1619 	type = ERTS_GET_USED_CPU_TOPOLOGY;
1620     else if (ERTS_IS_ATOM_STR("detected", which))
1621 	type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
1622     else if (ERTS_IS_ATOM_STR("defined", which))
1623 	type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
1624     else
1625 	type = ERTS_GET_CPU_TOPOLOGY_ERROR;
1626     if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
1627 	res = THE_NON_VALUE;
1628     else
1629 	res = get_cpu_topology_term(c_p, type);
1630     erts_rwmtx_runlock(&cpuinfo_rwmtx);
1631     return res;
1632 }
1633 
1634 static void
get_logical_processors(int * conf,int * onln,int * avail)1635 get_logical_processors(int *conf, int *onln, int *avail)
1636 {
1637     if (conf)
1638 	*conf = erts_get_cpu_configured(cpuinfo);
1639     if (onln)
1640 	*onln = erts_get_cpu_online(cpuinfo);
1641     if (avail)
1642 	*avail = erts_get_cpu_available(cpuinfo);
1643 }
1644 
1645 void
erts_get_logical_processors(int * conf,int * onln,int * avail)1646 erts_get_logical_processors(int *conf, int *onln, int *avail)
1647 {
1648     erts_rwmtx_rlock(&cpuinfo_rwmtx);
1649     get_logical_processors(conf, onln, avail);
1650     erts_rwmtx_runlock(&cpuinfo_rwmtx);
1651 }
1652 
1653 void
erts_pre_early_init_cpu_topology(int * max_dcg_p,int * max_rg_p,int * conf_p,int * onln_p,int * avail_p)1654 erts_pre_early_init_cpu_topology(int *max_dcg_p,
1655                                  int *max_rg_p,
1656 				 int *conf_p,
1657 				 int *onln_p,
1658 				 int *avail_p)
1659 {
1660     cpu_groups_maps = NULL;
1661     no_cpu_groups_callbacks = 0;
1662     *max_rg_p = ERTS_MAX_READER_GROUPS;
1663     *max_dcg_p = ERTS_MAX_FLXCTR_GROUPS;
1664     cpuinfo = erts_cpu_info_create();
1665     get_logical_processors(conf_p, onln_p, avail_p);
1666 }
1667 
1668 void
erts_early_init_cpu_topology(int no_schedulers,int * max_main_threads_p,int max_reader_groups,int * reader_groups_p,int max_decentralized_counter_groups,int * decentralized_counter_groups_p)1669 erts_early_init_cpu_topology(int no_schedulers,
1670 			     int *max_main_threads_p,
1671 			     int max_reader_groups,
1672 			     int *reader_groups_p,
1673                              int max_decentralized_counter_groups,
1674                              int *decentralized_counter_groups_p)
1675 {
1676     user_cpudata = NULL;
1677     user_cpudata_size = 0;
1678 
1679     system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
1680     system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
1681 				(sizeof(erts_cpu_topology_t)
1682 				 * system_cpudata_size));
1683 
1684     cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
1685 
1686     if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
1687 	|| ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
1688 							system_cpudata_size)) {
1689 	erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
1690 	system_cpudata = NULL;
1691 	system_cpudata_size = 0;
1692     }
1693 
1694     max_main_threads = erts_get_cpu_configured(cpuinfo);
1695     if (max_main_threads > no_schedulers || max_main_threads < 0)
1696 	max_main_threads = no_schedulers;
1697     *max_main_threads_p = max_main_threads;
1698 
1699     decentralized_counter_groups = max_main_threads;
1700     if (decentralized_counter_groups <= 1 || max_decentralized_counter_groups <= 1)
1701 	decentralized_counter_groups = 1;
1702     if (decentralized_counter_groups > max_decentralized_counter_groups)
1703 	decentralized_counter_groups = max_decentralized_counter_groups;
1704     *decentralized_counter_groups_p = decentralized_counter_groups;
1705     reader_groups = max_main_threads;
1706     if (reader_groups <= 1 || max_reader_groups <= 1)
1707 	reader_groups = 0;
1708     if (reader_groups > max_reader_groups)
1709 	reader_groups = max_reader_groups;
1710     *reader_groups_p = reader_groups;
1711 }
1712 
1713 void
erts_init_cpu_topology(void)1714 erts_init_cpu_topology(void)
1715 {
1716     int ix;
1717 
1718     erts_rwmtx_init(&cpuinfo_rwmtx, "cpu_info", NIL,
1719         ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
1720     erts_rwmtx_rwlock(&cpuinfo_rwmtx);
1721 
1722     scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
1723 				   (sizeof(ErtsCpuBindData)
1724 				    * (erts_no_schedulers+1)));
1725     for (ix = 1; ix <= erts_no_schedulers; ix++) {
1726 	scheduler2cpu_map[ix].bind_id = -1;
1727 	scheduler2cpu_map[ix].bound_id = -1;
1728     }
1729 
1730     if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED)
1731 	cpu_bind_order = ERTS_CPU_BIND_NONE;
1732 
1733     reader_groups_map = add_cpu_groups(reader_groups,
1734 				       reader_groups_callback,
1735 				       NULL);
1736     decentralized_counter_groups_map = add_cpu_groups(decentralized_counter_groups,
1737                                                       flxctr_groups_callback,
1738                                                       NULL);
1739 
1740     if (cpu_bind_order == ERTS_CPU_BIND_NONE)
1741 	erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1742     else {
1743 	erts_cpu_topology_t *cpudata;
1744 	int cpudata_size;
1745 	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
1746 	write_schedulers_bind_change(cpudata, cpudata_size);
1747 	erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1748 	erts_sched_notify_check_cpu_bind();
1749 	destroy_tmp_cpu_topology_copy(cpudata);
1750     }
1751 }
1752 
1753 int
erts_update_cpu_info(void)1754 erts_update_cpu_info(void)
1755 {
1756     int changed;
1757     erts_rwmtx_rwlock(&cpuinfo_rwmtx);
1758     changed = erts_cpu_info_update(cpuinfo);
1759     if (changed) {
1760 	erts_cpu_topology_t *cpudata;
1761 	int cpudata_size;
1762 
1763 	if (system_cpudata)
1764 	    erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
1765 
1766 	system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
1767 	if (!system_cpudata_size)
1768 	    system_cpudata = NULL;
1769 	else {
1770 	    system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
1771 					(sizeof(erts_cpu_topology_t)
1772 					 * system_cpudata_size));
1773 
1774 	    if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
1775 		|| (ERTS_INIT_CPU_TOPOLOGY_OK
1776 		    != verify_topology(system_cpudata,
1777 				       system_cpudata_size))) {
1778 		erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
1779 		system_cpudata = NULL;
1780 		system_cpudata_size = 0;
1781 	    }
1782 	}
1783 
1784 	update_cpu_groups_maps();
1785 
1786 	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
1787 	write_schedulers_bind_change(cpudata, cpudata_size);
1788 	destroy_tmp_cpu_topology_copy(cpudata);
1789     }
1790     erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1791     if (changed)
1792 	erts_sched_notify_check_cpu_bind();
1793     return changed;
1794 }
1795 
1796 /*
1797  * reader groups map
1798  */
1799 
1800 void
reader_groups_callback(int suspending,ErtsSchedulerData * esdp,int group,void * unused)1801 reader_groups_callback(int suspending,
1802 		       ErtsSchedulerData *esdp,
1803 		       int group,
1804 		       void *unused)
1805 {
1806     if (reader_groups && esdp->no <= max_main_threads)
1807 	erts_rwmtx_set_reader_group(suspending ? 0 : group+1);
1808 }
1809 
1810 void
flxctr_groups_callback(int suspending,ErtsSchedulerData * esdp,int group,void * unused)1811 flxctr_groups_callback(int suspending,
1812 		       ErtsSchedulerData *esdp,
1813 		       int group,
1814 		       void *unused)
1815 {
1816     erts_flxctr_set_slot(suspending ? 0 : group+1);
1817 }
1818 
1819 static Eterm get_cpu_groups_map(Process *c_p,
1820 				erts_cpu_groups_map_t *map,
1821 				int offset);
1822 Eterm
erts_debug_reader_groups_map(Process * c_p,int groups)1823 erts_debug_reader_groups_map(Process *c_p, int groups)
1824 {
1825     Eterm res;
1826     erts_cpu_groups_map_t test;
1827 
1828     test.array = NULL;
1829     test.groups = groups;
1830     make_cpu_groups_map(&test, 1);
1831     if (!test.array)
1832 	res = NIL;
1833     else {
1834 	res = get_cpu_groups_map(c_p, &test, 1);
1835 	erts_free(ERTS_ALC_T_TMP, test.array);
1836     }
1837     return res;
1838 }
1839 
1840 
1841 Eterm
erts_get_reader_groups_map(Process * c_p)1842 erts_get_reader_groups_map(Process *c_p)
1843 {
1844     Eterm res;
1845     erts_rwmtx_rlock(&cpuinfo_rwmtx);
1846     res = get_cpu_groups_map(c_p, reader_groups_map, 1);
1847     erts_rwmtx_runlock(&cpuinfo_rwmtx);
1848     return res;
1849 }
1850 
1851 Eterm
erts_get_decentralized_counter_groups_map(Process * c_p)1852 erts_get_decentralized_counter_groups_map(Process *c_p)
1853 {
1854     Eterm res;
1855     erts_rwmtx_rlock(&cpuinfo_rwmtx);
1856     res = get_cpu_groups_map(c_p, decentralized_counter_groups_map, 1);
1857     erts_rwmtx_runlock(&cpuinfo_rwmtx);
1858     return res;
1859 }
1860 
1861 /*
1862  * CPU groups
1863  */
1864 
1865 static Eterm
get_cpu_groups_map(Process * c_p,erts_cpu_groups_map_t * map,int offset)1866 get_cpu_groups_map(Process *c_p,
1867 		   erts_cpu_groups_map_t *map,
1868 		   int offset)
1869 {
1870 #ifdef DEBUG
1871     Eterm *endp;
1872 #endif
1873     Eterm res = NIL, tuple;
1874     Eterm *hp;
1875     int i;
1876 
1877     hp = HAlloc(c_p, map->logical_processors*(2+3));
1878 #ifdef DEBUG
1879     endp = hp + map->logical_processors*(2+3);
1880 #endif
1881     for (i = map->size - 1; i >= 0; i--) {
1882 	if (map->array[i].logical >= 0) {
1883 	    tuple = TUPLE2(hp,
1884 			   make_small(map->array[i].logical),
1885 			   make_small(map->array[i].cpu_group + offset));
1886 	    hp += 3;
1887 	    res = CONS(hp, tuple, res);
1888 	    hp += 2;
1889 	}
1890     }
1891     ASSERT(hp == endp);
1892     return res;
1893 }
1894 
1895 static void
make_available_cpu_topology(erts_avail_cput * no,erts_avail_cput * avail,erts_cpu_topology_t * cpudata,int * size,int test)1896 make_available_cpu_topology(erts_avail_cput *no,
1897 			    erts_avail_cput *avail,
1898 			    erts_cpu_topology_t *cpudata,
1899 			    int *size,
1900 			    int test)
1901 {
1902     int len = *size;
1903     erts_cpu_topology_t last;
1904     int a, i, j;
1905 
1906     no->level[ERTS_TOPOLOGY_NODE] = -1;
1907     no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
1908     no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
1909     no->level[ERTS_TOPOLOGY_CORE] = -1;
1910     no->level[ERTS_TOPOLOGY_THREAD] = -1;
1911     no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
1912 
1913     last.node = INT_MIN;
1914     last.processor = INT_MIN;
1915     last.processor_node = INT_MIN;
1916     last.core = INT_MIN;
1917     last.thread = INT_MIN;
1918     last.logical = INT_MIN;
1919 
1920     a = 0;
1921 
1922     for (i = 0; i < len; i++) {
1923 
1924 	if (!test && !erts_is_cpu_available(cpuinfo, cpudata[i].logical))
1925 	    continue;
1926 
1927 	if (last.node != cpudata[i].node)
1928 	    goto node;
1929 	if (last.processor != cpudata[i].processor)
1930 	    goto processor;
1931 	if (last.processor_node != cpudata[i].processor_node)
1932 	    goto processor_node;
1933 	if (last.core != cpudata[i].core)
1934 	    goto core;
1935 	ASSERT(last.thread != cpudata[i].thread);
1936 	goto thread;
1937 
1938     node:
1939 	no->level[ERTS_TOPOLOGY_NODE]++;
1940     processor:
1941 	no->level[ERTS_TOPOLOGY_PROCESSOR]++;
1942     processor_node:
1943 	no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
1944     core:
1945 	no->level[ERTS_TOPOLOGY_CORE]++;
1946     thread:
1947 	no->level[ERTS_TOPOLOGY_THREAD]++;
1948 
1949 	no->level[ERTS_TOPOLOGY_LOGICAL]++;
1950 
1951 	for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
1952 	    avail[a].level[j] = no->level[j];
1953 
1954 	avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
1955 	avail[a].level[ERTS_TOPOLOGY_CG] = 0;
1956 
1957 	ASSERT(last.logical != cpudata[i].logical);
1958 
1959 	last = cpudata[i];
1960 	a++;
1961     }
1962 
1963     no->level[ERTS_TOPOLOGY_NODE]++;
1964     no->level[ERTS_TOPOLOGY_PROCESSOR]++;
1965     no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
1966     no->level[ERTS_TOPOLOGY_CORE]++;
1967     no->level[ERTS_TOPOLOGY_THREAD]++;
1968     no->level[ERTS_TOPOLOGY_LOGICAL]++;
1969 
1970     *size = a;
1971 }
1972 
1973 static void
cpu_group_insert(erts_cpu_groups_map_t * map,int logical,int cpu_group)1974 cpu_group_insert(erts_cpu_groups_map_t *map,
1975 		 int logical, int cpu_group)
1976 {
1977     int start = logical % map->size;
1978     int ix = start;
1979 
1980     do {
1981 	if (map->array[ix].logical < 0) {
1982 	    map->array[ix].logical = logical;
1983 	    map->array[ix].cpu_group = cpu_group;
1984 	    return;
1985 	}
1986 	ix++;
1987 	if (ix == map->size)
1988 	    ix = 0;
1989     } while (ix != start);
1990 
1991     erts_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
1992 }
1993 
1994 
1995 static int
sub_levels(erts_cpu_groups_count_t * cgc,int level,int aix,int avail_sz,erts_avail_cput * avail)1996 sub_levels(erts_cpu_groups_count_t *cgc, int level, int aix,
1997 	   int avail_sz, erts_avail_cput *avail)
1998 {
1999     int sub_level = level+1;
2000     int last = -1;
2001     cgc->sub_levels = 0;
2002 
2003     do {
2004 	if (last != avail[aix].level[sub_level]) {
2005 	    cgc->sub_levels++;
2006 	    last = avail[aix].level[sub_level];
2007 	}
2008 	aix++;
2009     }
2010     while (aix < avail_sz && cgc->id == avail[aix].level[level]);
2011     cgc->cpu_groups = 0;
2012     return aix;
2013 }
2014 
2015 static int
write_cpu_groups(int * cgp,erts_cpu_groups_count_t * cgcp,int level,int a,int avail_sz,erts_avail_cput * avail)2016 write_cpu_groups(int *cgp, erts_cpu_groups_count_t *cgcp,
2017 		    int level, int a,
2018 		    int avail_sz, erts_avail_cput *avail)
2019 {
2020     int cg = *cgp;
2021     int sub_level = level+1;
2022     int sl_per_gr = cgcp->sub_levels / cgcp->cpu_groups;
2023     int xsl = cgcp->sub_levels % cgcp->cpu_groups;
2024     int sls = 0;
2025     int last = -1;
2026     int xsl_cg_lim = (cgcp->cpu_groups - xsl) + cg + 1;
2027 
2028     ASSERT(level < 0 || avail[a].level[level] == cgcp->id);
2029 
2030     do {
2031 	if (last != avail[a].level[sub_level]) {
2032 	    if (!sls) {
2033 		sls = sl_per_gr;
2034 		cg++;
2035 		if (cg >= xsl_cg_lim)
2036 		    sls++;
2037 	    }
2038 	    last = avail[a].level[sub_level];
2039 	    sls--;
2040 	}
2041 	avail[a].level[ERTS_TOPOLOGY_CG] = cg;
2042 	a++;
2043     } while (a < avail_sz && (level < 0
2044 			      || avail[a].level[level] == cgcp->id));
2045 
2046     ASSERT(cgcp->cpu_groups == cg - *cgp);
2047 
2048     *cgp = cg;
2049 
2050     return a;
2051 }
2052 
2053 static int
cg_count_sub_levels_compare(const void * vx,const void * vy)2054 cg_count_sub_levels_compare(const void *vx, const void *vy)
2055 {
2056     erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
2057     erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
2058     if (x->sub_levels != y->sub_levels)
2059 	return y->sub_levels - x->sub_levels;
2060     return x->id - y->id;
2061 }
2062 
2063 static int
cg_count_id_compare(const void * vx,const void * vy)2064 cg_count_id_compare(const void *vx, const void *vy)
2065 {
2066     erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
2067     erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
2068     return x->id - y->id;
2069 }
2070 
2071 static void
make_cpu_groups_map(erts_cpu_groups_map_t * map,int test)2072 make_cpu_groups_map(erts_cpu_groups_map_t *map, int test)
2073 {
2074     int i, spread_level, avail_sz;
2075     erts_avail_cput no, *avail;
2076     erts_cpu_topology_t *cpudata;
2077     ErtsAlcType_t alc_type = (test
2078 			      ? ERTS_ALC_T_TMP
2079 			      : ERTS_ALC_T_CPU_GRPS_MAP);
2080 
2081     if (map->array)
2082 	erts_free(alc_type, map->array);
2083 
2084     map->array = NULL;
2085     map->logical_processors = 0;
2086     map->size = 0;
2087 
2088     if (!map->groups)
2089 	return;
2090 
2091     create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
2092 
2093     if (!cpudata)
2094 	return;
2095 
2096     cpu_bind_order_sort(cpudata,
2097 			avail_sz,
2098 			ERTS_CPU_BIND_NO_SPREAD,
2099 			1);
2100 
2101     avail = erts_alloc(ERTS_ALC_T_TMP,
2102 		       sizeof(erts_avail_cput)*avail_sz);
2103 
2104     make_available_cpu_topology(&no, avail, cpudata,
2105 				&avail_sz, test);
2106 
2107     destroy_tmp_cpu_topology_copy(cpudata);
2108 
2109     map->size = avail_sz*2+1;
2110 
2111     map->array = erts_alloc(alc_type,
2112 			    (sizeof(erts_cpu_groups_map_array_t)
2113 			     * map->size));;
2114     map->logical_processors = avail_sz;
2115 
2116     for (i = 0; i < map->size; i++) {
2117 	map->array[i].logical = -1;
2118 	map->array[i].cpu_group = -1;
2119     }
2120 
2121     spread_level = ERTS_TOPOLOGY_CORE;
2122     for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
2123 	if (no.level[i] > map->groups) {
2124 	    spread_level = i;
2125 	    break;
2126 	}
2127     }
2128 
2129     if (no.level[spread_level] <= map->groups) {
2130 	int a, cg, last = -1;
2131 	cg = -1;
2132 	ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
2133 	for (a = 0; a < avail_sz; a++) {
2134 	    if (last != avail[a].level[spread_level]) {
2135 		cg++;
2136 		last = avail[a].level[spread_level];
2137 	    }
2138 	    cpu_group_insert(map,
2139 			     avail[a].level[ERTS_TOPOLOGY_LOGICAL],
2140 			     cg);
2141 	}
2142     }
2143     else { /* map->groups < no.level[spread_level] */
2144 	erts_cpu_groups_count_t *cg_count;
2145 	int a, cg, tl, toplevels;
2146 
2147 	tl = spread_level-1;
2148 
2149 	if (spread_level == ERTS_TOPOLOGY_NODE)
2150 	    toplevels = 1;
2151 	else
2152 	    toplevels = no.level[tl];
2153 
2154 	cg_count = erts_alloc(ERTS_ALC_T_TMP,
2155 			      toplevels*sizeof(erts_cpu_groups_count_t));
2156 
2157 	if (toplevels == 1) {
2158 	    cg_count[0].id = 0;
2159 	    cg_count[0].sub_levels = no.level[spread_level];
2160 	    cg_count[0].cpu_groups = map->groups;
2161 	}
2162 	else {
2163 	    int cgs_per_tl, cgs;
2164 	    cgs = map->groups;
2165 	    cgs_per_tl = cgs / toplevels;
2166 
2167 	    a = 0;
2168 	    for (i = 0; i < toplevels; i++) {
2169 		cg_count[i].id = avail[a].level[tl];
2170 		a = sub_levels(&cg_count[i], tl, a, avail_sz, avail);
2171 	    }
2172 
2173 	    qsort(cg_count,
2174 		  toplevels,
2175 		  sizeof(erts_cpu_groups_count_t),
2176 		  cg_count_sub_levels_compare);
2177 
2178 	    for (i = 0; i < toplevels; i++) {
2179 		if (cg_count[i].sub_levels < cgs_per_tl) {
2180 		    cg_count[i].cpu_groups = cg_count[i].sub_levels;
2181 		    cgs -= cg_count[i].sub_levels;
2182 		}
2183 		else {
2184 		    cg_count[i].cpu_groups = cgs_per_tl;
2185 		    cgs -= cgs_per_tl;
2186 		}
2187 	    }
2188 
2189 	    while (cgs > 0) {
2190 		for (i = 0; i < toplevels; i++) {
2191 		    if (cg_count[i].sub_levels == cg_count[i].cpu_groups)
2192 			break;
2193 		    else {
2194 			cg_count[i].cpu_groups++;
2195 			if (--cgs == 0)
2196 			    break;
2197 		    }
2198 		}
2199 	    }
2200 
2201 	    qsort(cg_count,
2202 		  toplevels,
2203 		  sizeof(erts_cpu_groups_count_t),
2204 		  cg_count_id_compare);
2205 	}
2206 
2207 	a = i = 0;
2208 	cg = -1;
2209 	while (a < avail_sz) {
2210 	    a = write_cpu_groups(&cg, &cg_count[i], tl,
2211 				 a, avail_sz, avail);
2212 	    i++;
2213 	}
2214 
2215 	ASSERT(map->groups == cg + 1);
2216 
2217 	for (a = 0; a < avail_sz; a++)
2218 	    cpu_group_insert(map,
2219 			     avail[a].level[ERTS_TOPOLOGY_LOGICAL],
2220 			     avail[a].level[ERTS_TOPOLOGY_CG]);
2221 
2222 	erts_free(ERTS_ALC_T_TMP, cg_count);
2223     }
2224 
2225     erts_free(ERTS_ALC_T_TMP, avail);
2226 }
2227 
2228 static erts_cpu_groups_map_t *
add_cpu_groups(int groups,erts_cpu_groups_callback_t callback,void * arg)2229 add_cpu_groups(int groups,
2230 	       erts_cpu_groups_callback_t callback,
2231 	       void *arg)
2232 {
2233     int use_groups = groups;
2234     erts_cpu_groups_callback_list_t *cgcl;
2235     erts_cpu_groups_map_t *cgm;
2236 
2237     ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
2238 
2239     if (use_groups > max_main_threads)
2240 	use_groups = max_main_threads;
2241 
2242     if (!use_groups)
2243 	return NULL;
2244 
2245     no_cpu_groups_callbacks++;
2246     cgcl = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
2247 		      sizeof(erts_cpu_groups_callback_list_t));
2248     cgcl->callback = callback;
2249     cgcl->arg = arg;
2250 
2251     for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
2252 	if (cgm->groups == use_groups) {
2253 	    cgcl->next = cgm->callback_list;
2254 	    cgm->callback_list = cgcl;
2255 	    return cgm;
2256 	}
2257     }
2258 
2259 
2260     cgm = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
2261 		     sizeof(erts_cpu_groups_map_t));
2262     cgm->next = cpu_groups_maps;
2263     cgm->groups = use_groups;
2264     cgm->array = NULL;
2265     cgm->size = 0;
2266     cgm->logical_processors = 0;
2267     cgm->callback_list = cgcl;
2268 
2269     cgcl->next = NULL;
2270 
2271     make_cpu_groups_map(cgm, 0);
2272 
2273     cpu_groups_maps = cgm;
2274 
2275     return cgm;
2276 }
2277 
2278 static int
cpu_groups_lookup(erts_cpu_groups_map_t * map,ErtsSchedulerData * esdp)2279 cpu_groups_lookup(erts_cpu_groups_map_t *map,
2280 		  ErtsSchedulerData *esdp)
2281 {
2282     int start, logical, ix;
2283 
2284     ERTS_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
2285 		       || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
2286 
2287     if (esdp->cpu_id < 0)
2288 	return (((int) esdp->no) - 1) % map->groups;
2289 
2290     logical = esdp->cpu_id;
2291     start = logical % map->size;
2292     ix = start;
2293 
2294     do {
2295 	if (map->array[ix].logical == logical) {
2296 	    int group = map->array[ix].cpu_group;
2297 	    ASSERT(0 <= group && group < map->groups);
2298 	    return group;
2299 	}
2300 	ix++;
2301 	if (ix == map->size)
2302 	    ix = 0;
2303     } while (ix != start);
2304 
2305     erts_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
2306 }
2307 
2308 static void
update_cpu_groups_maps(void)2309 update_cpu_groups_maps(void)
2310 {
2311     erts_cpu_groups_map_t *cgm;
2312     ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
2313 
2314     for (cgm = cpu_groups_maps; cgm; cgm = cgm->next)
2315 	make_cpu_groups_map(cgm, 0);
2316 }
2317