1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB 2010-2020. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20
21 /*
22 * Description: CPU topology and related functionality
23 *
24 * Author: Rickard Green
25 */
26
27 #ifdef HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30
31 #include <ctype.h>
32
33 #include "global.h"
34 #include "error.h"
35 #include "bif.h"
36 #include "erl_cpu_topology.h"
37 #include "erl_flxctr.h"
38
39 #define ERTS_MAX_READER_GROUPS 64
40
41 /*
42 * Cpu topology hierarchy.
43 */
44 #define ERTS_TOPOLOGY_NODE 0
45 #define ERTS_TOPOLOGY_PROCESSOR 1
46 #define ERTS_TOPOLOGY_PROCESSOR_NODE 2
47 #define ERTS_TOPOLOGY_CORE 3
48 #define ERTS_TOPOLOGY_THREAD 4
49 #define ERTS_TOPOLOGY_LOGICAL 5
50
51 #define ERTS_TOPOLOGY_MAX_DEPTH 6
52
53 typedef struct {
54 int bind_id;
55 int bound_id;
56 } ErtsCpuBindData;
57
58 static erts_cpu_info_t *cpuinfo;
59
60 static int max_main_threads;
61 static int reader_groups;
62 static int decentralized_counter_groups;
63
64 static ErtsCpuBindData *scheduler2cpu_map;
65 static erts_rwmtx_t cpuinfo_rwmtx;
66
67 typedef enum {
68 ERTS_CPU_BIND_UNDEFINED,
69 ERTS_CPU_BIND_SPREAD,
70 ERTS_CPU_BIND_PROCESSOR_SPREAD,
71 ERTS_CPU_BIND_THREAD_SPREAD,
72 ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
73 ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
74 ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
75 ERTS_CPU_BIND_NO_SPREAD,
76 ERTS_CPU_BIND_NONE
77 } ErtsCpuBindOrder;
78
79 #define ERTS_CPU_BIND_DEFAULT_BIND \
80 ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
81
82 static int no_cpu_groups_callbacks;
83 static ErtsCpuBindOrder cpu_bind_order;
84
85 static erts_cpu_topology_t *user_cpudata;
86 static int user_cpudata_size;
87 static erts_cpu_topology_t *system_cpudata;
88 static int system_cpudata_size;
89
90 typedef struct {
91 int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
92 } erts_avail_cput;
93
94 typedef struct {
95 int id;
96 int sub_levels;
97 int cpu_groups;
98 } erts_cpu_groups_count_t;
99
100 typedef struct {
101 int logical;
102 int cpu_group;
103 } erts_cpu_groups_map_array_t;
104
105 typedef struct erts_cpu_groups_callback_list_t_ erts_cpu_groups_callback_list_t;
106 struct erts_cpu_groups_callback_list_t_ {
107 erts_cpu_groups_callback_list_t *next;
108 erts_cpu_groups_callback_t callback;
109 void *arg;
110 };
111
112 typedef struct erts_cpu_groups_map_t_ erts_cpu_groups_map_t;
113 struct erts_cpu_groups_map_t_ {
114 erts_cpu_groups_map_t *next;
115 int groups;
116 erts_cpu_groups_map_array_t *array;
117 int size;
118 int logical_processors;
119 erts_cpu_groups_callback_list_t *callback_list;
120 };
121
122 typedef struct {
123 erts_cpu_groups_callback_t callback;
124 int ix;
125 void *arg;
126 } erts_cpu_groups_callback_call_t;
127
128 static erts_cpu_groups_map_t *cpu_groups_maps;
129
130 static erts_cpu_groups_map_t *reader_groups_map;
131
132 static erts_cpu_groups_map_t *decentralized_counter_groups_map;
133
134 #define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH
135
136 #define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
137
138 static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
139 int size,
140 ErtsCpuBindOrder bind_order,
141 int mk_seq);
142 static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
143
144 static void reader_groups_callback(int, ErtsSchedulerData *, int, void *);
145 static void flxctr_groups_callback(int, ErtsSchedulerData *, int, void *);
146 static erts_cpu_groups_map_t *add_cpu_groups(int groups,
147 erts_cpu_groups_callback_t callback,
148 void *arg);
149 static void update_cpu_groups_maps(void);
150 static void make_cpu_groups_map(erts_cpu_groups_map_t *map, int test);
151 static int cpu_groups_lookup(erts_cpu_groups_map_t *map,
152 ErtsSchedulerData *esdp);
153
154 static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
155 int *cpudata_size);
156 static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
157
158 static int
int_cmp(const void * vx,const void * vy)159 int_cmp(const void *vx, const void *vy)
160 {
161 return *((int *) vx) - *((int *) vy);
162 }
163
164 static int
cpu_spread_order_cmp(const void * vx,const void * vy)165 cpu_spread_order_cmp(const void *vx, const void *vy)
166 {
167 erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
168 erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
169
170 if (x->thread != y->thread)
171 return x->thread - y->thread;
172 if (x->core != y->core)
173 return x->core - y->core;
174 if (x->processor_node != y->processor_node)
175 return x->processor_node - y->processor_node;
176 if (x->processor != y->processor)
177 return x->processor - y->processor;
178 if (x->node != y->node)
179 return x->node - y->node;
180 return 0;
181 }
182
183 static int
cpu_processor_spread_order_cmp(const void * vx,const void * vy)184 cpu_processor_spread_order_cmp(const void *vx, const void *vy)
185 {
186 erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
187 erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
188
189 if (x->thread != y->thread)
190 return x->thread - y->thread;
191 if (x->processor_node != y->processor_node)
192 return x->processor_node - y->processor_node;
193 if (x->core != y->core)
194 return x->core - y->core;
195 if (x->node != y->node)
196 return x->node - y->node;
197 if (x->processor != y->processor)
198 return x->processor - y->processor;
199 return 0;
200 }
201
202 static int
cpu_thread_spread_order_cmp(const void * vx,const void * vy)203 cpu_thread_spread_order_cmp(const void *vx, const void *vy)
204 {
205 erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
206 erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
207
208 if (x->thread != y->thread)
209 return x->thread - y->thread;
210 if (x->node != y->node)
211 return x->node - y->node;
212 if (x->processor != y->processor)
213 return x->processor - y->processor;
214 if (x->processor_node != y->processor_node)
215 return x->processor_node - y->processor_node;
216 if (x->core != y->core)
217 return x->core - y->core;
218 return 0;
219 }
220
221 static int
cpu_thread_no_node_processor_spread_order_cmp(const void * vx,const void * vy)222 cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
223 {
224 erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
225 erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
226
227 if (x->thread != y->thread)
228 return x->thread - y->thread;
229 if (x->node != y->node)
230 return x->node - y->node;
231 if (x->core != y->core)
232 return x->core - y->core;
233 if (x->processor != y->processor)
234 return x->processor - y->processor;
235 return 0;
236 }
237
238 static int
cpu_no_node_processor_spread_order_cmp(const void * vx,const void * vy)239 cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
240 {
241 erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
242 erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
243
244 if (x->node != y->node)
245 return x->node - y->node;
246 if (x->thread != y->thread)
247 return x->thread - y->thread;
248 if (x->core != y->core)
249 return x->core - y->core;
250 if (x->processor != y->processor)
251 return x->processor - y->processor;
252 return 0;
253 }
254
255 static int
cpu_no_node_thread_spread_order_cmp(const void * vx,const void * vy)256 cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
257 {
258 erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
259 erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
260
261 if (x->node != y->node)
262 return x->node - y->node;
263 if (x->thread != y->thread)
264 return x->thread - y->thread;
265 if (x->processor != y->processor)
266 return x->processor - y->processor;
267 if (x->core != y->core)
268 return x->core - y->core;
269 return 0;
270 }
271
272 static int
cpu_no_spread_order_cmp(const void * vx,const void * vy)273 cpu_no_spread_order_cmp(const void *vx, const void *vy)
274 {
275 erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
276 erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
277
278 if (x->node != y->node)
279 return x->node - y->node;
280 if (x->processor != y->processor)
281 return x->processor - y->processor;
282 if (x->processor_node != y->processor_node)
283 return x->processor_node - y->processor_node;
284 if (x->core != y->core)
285 return x->core - y->core;
286 if (x->thread != y->thread)
287 return x->thread - y->thread;
288 return 0;
289 }
290
291 static ERTS_INLINE void
make_cpudata_id_seq(erts_cpu_topology_t * cpudata,int size,int no_node)292 make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
293 {
294 int ix;
295 int node = -1;
296 int processor = -1;
297 int processor_node = -1;
298 int processor_node_node = -1;
299 int core = -1;
300 int thread = -1;
301 int old_node = -1;
302 int old_processor = -1;
303 int old_processor_node = -1;
304 int old_core = -1;
305 int old_thread = -1;
306
307 for (ix = 0; ix < size; ix++) {
308 if (!no_node || cpudata[ix].node >= 0) {
309 if (old_node == cpudata[ix].node)
310 cpudata[ix].node = node;
311 else {
312 old_node = cpudata[ix].node;
313 old_processor = processor = -1;
314 if (!no_node)
315 old_processor_node = processor_node = -1;
316 old_core = core = -1;
317 old_thread = thread = -1;
318 if (no_node || cpudata[ix].node >= 0)
319 cpudata[ix].node = ++node;
320 }
321 }
322 if (old_processor == cpudata[ix].processor)
323 cpudata[ix].processor = processor;
324 else {
325 old_processor = cpudata[ix].processor;
326 if (!no_node)
327 processor_node_node = old_processor_node = processor_node = -1;
328 old_core = core = -1;
329 old_thread = thread = -1;
330 cpudata[ix].processor = ++processor;
331 }
332 if (no_node && cpudata[ix].processor_node < 0)
333 old_processor_node = -1;
334 else {
335 if (old_processor_node == cpudata[ix].processor_node) {
336 if (no_node)
337 cpudata[ix].node = cpudata[ix].processor_node = node;
338 else {
339 if (processor_node_node >= 0)
340 cpudata[ix].node = processor_node_node;
341 cpudata[ix].processor_node = processor_node;
342 }
343 }
344 else {
345 old_processor_node = cpudata[ix].processor_node;
346 old_core = core = -1;
347 old_thread = thread = -1;
348 if (no_node)
349 cpudata[ix].node = cpudata[ix].processor_node = ++node;
350 else {
351 cpudata[ix].node = processor_node_node = ++node;
352 cpudata[ix].processor_node = ++processor_node;
353 }
354 }
355 }
356 if (!no_node && cpudata[ix].processor_node < 0)
357 cpudata[ix].processor_node = 0;
358 if (old_core == cpudata[ix].core)
359 cpudata[ix].core = core;
360 else {
361 old_core = cpudata[ix].core;
362 old_thread = thread = -1;
363 cpudata[ix].core = ++core;
364 }
365 if (old_thread == cpudata[ix].thread)
366 cpudata[ix].thread = thread;
367 else
368 old_thread = cpudata[ix].thread = ++thread;
369 }
370 }
371
372 static void
cpu_bind_order_sort(erts_cpu_topology_t * cpudata,int size,ErtsCpuBindOrder bind_order,int mk_seq)373 cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
374 int size,
375 ErtsCpuBindOrder bind_order,
376 int mk_seq)
377 {
378 if (size > 1) {
379 int no_node = 0;
380 int (*cmp_func)(const void *, const void *);
381 switch (bind_order) {
382 case ERTS_CPU_BIND_SPREAD:
383 cmp_func = cpu_spread_order_cmp;
384 break;
385 case ERTS_CPU_BIND_PROCESSOR_SPREAD:
386 cmp_func = cpu_processor_spread_order_cmp;
387 break;
388 case ERTS_CPU_BIND_THREAD_SPREAD:
389 cmp_func = cpu_thread_spread_order_cmp;
390 break;
391 case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
392 no_node = 1;
393 cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
394 break;
395 case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
396 no_node = 1;
397 cmp_func = cpu_no_node_processor_spread_order_cmp;
398 break;
399 case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
400 no_node = 1;
401 cmp_func = cpu_no_node_thread_spread_order_cmp;
402 break;
403 case ERTS_CPU_BIND_NO_SPREAD:
404 cmp_func = cpu_no_spread_order_cmp;
405 break;
406 default:
407 cmp_func = NULL;
408 erts_exit(ERTS_ABORT_EXIT,
409 "Bad cpu bind type: %d\n",
410 (int) cpu_bind_order);
411 break;
412 }
413
414 if (mk_seq)
415 make_cpudata_id_seq(cpudata, size, no_node);
416
417 qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
418 }
419 }
420
421 static int
processor_order_cmp(const void * vx,const void * vy)422 processor_order_cmp(const void *vx, const void *vy)
423 {
424 erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
425 erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
426
427 if (x->processor != y->processor)
428 return x->processor - y->processor;
429 if (x->node != y->node)
430 return x->node - y->node;
431 if (x->processor_node != y->processor_node)
432 return x->processor_node - y->processor_node;
433 if (x->core != y->core)
434 return x->core - y->core;
435 if (x->thread != y->thread)
436 return x->thread - y->thread;
437 return 0;
438 }
439
440 void
erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData * esdp)441 erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp)
442 {
443 erts_cpu_groups_map_t *cgm;
444 erts_cpu_groups_callback_list_t *cgcl;
445 erts_cpu_groups_callback_call_t *cgcc;
446 int cgcc_ix;
447
448 /* Unbind from cpu */
449 erts_rwmtx_rwlock(&cpuinfo_rwmtx);
450 if (scheduler2cpu_map[esdp->no].bound_id >= 0
451 && erts_unbind_from_cpu(cpuinfo) == 0) {
452 esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
453 }
454
455 cgcc = erts_alloc(ERTS_ALC_T_TMP,
456 (no_cpu_groups_callbacks
457 * sizeof(erts_cpu_groups_callback_call_t)));
458 cgcc_ix = 0;
459 for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
460 for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
461 cgcc[cgcc_ix].callback = cgcl->callback;
462 cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
463 cgcc[cgcc_ix].arg = cgcl->arg;
464 cgcc_ix++;
465 }
466 }
467 ASSERT(no_cpu_groups_callbacks == cgcc_ix);
468 erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
469
470 for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
471 cgcc[cgcc_ix].callback(1,
472 esdp,
473 cgcc[cgcc_ix].ix,
474 cgcc[cgcc_ix].arg);
475
476 erts_free(ERTS_ALC_T_TMP, cgcc);
477
478 if (esdp->no <= max_main_threads)
479 erts_thr_set_main_status(0, 0);
480
481 }
482
483 void
erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData * esdp)484 erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp)
485 {
486 ERTS_LC_ASSERT(erts_lc_runq_is_locked(esdp->run_queue));
487
488 if (esdp->no <= max_main_threads)
489 erts_thr_set_main_status(1, (int) esdp->no);
490
491 /* Make sure we check if we should bind to a cpu or not... */
492 (void) ERTS_RUNQ_FLGS_SET(esdp->run_queue, ERTS_RUNQ_FLG_CHK_CPU_BIND);
493 }
494
495
496 void
erts_sched_check_cpu_bind(ErtsSchedulerData * esdp)497 erts_sched_check_cpu_bind(ErtsSchedulerData *esdp)
498 {
499 int res, cpu_id, cgcc_ix;
500 erts_cpu_groups_map_t *cgm;
501 erts_cpu_groups_callback_list_t *cgcl;
502 erts_cpu_groups_callback_call_t *cgcc;
503 erts_runq_unlock(esdp->run_queue);
504 erts_rwmtx_rwlock(&cpuinfo_rwmtx);
505 cpu_id = scheduler2cpu_map[esdp->no].bind_id;
506 if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
507 res = erts_bind_to_cpu(cpuinfo, cpu_id);
508 if (res == 0)
509 esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
510 else {
511 erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
512 erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
513 (int) esdp->no, cpu_id, erl_errno_id(-res));
514 erts_send_error_to_logger_nogl(dsbufp);
515 if (scheduler2cpu_map[esdp->no].bound_id >= 0)
516 goto unbind;
517 }
518 }
519 else if (cpu_id < 0) {
520 unbind:
521 /* Get rid of old binding */
522 res = erts_unbind_from_cpu(cpuinfo);
523 if (res == 0)
524 esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
525 else if (res != -ENOTSUP) {
526 erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
527 erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
528 (int) esdp->no, cpu_id, erl_errno_id(-res));
529 erts_send_error_to_logger_nogl(dsbufp);
530 }
531 }
532
533 cgcc = erts_alloc(ERTS_ALC_T_TMP,
534 (no_cpu_groups_callbacks
535 * sizeof(erts_cpu_groups_callback_call_t)));
536 cgcc_ix = 0;
537 for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
538 for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
539 cgcc[cgcc_ix].callback = cgcl->callback;
540 cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
541 cgcc[cgcc_ix].arg = cgcl->arg;
542 cgcc_ix++;
543 }
544 }
545
546 ASSERT(no_cpu_groups_callbacks == cgcc_ix);
547 erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
548
549 for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
550 cgcc[cgcc_ix].callback(0,
551 esdp,
552 cgcc[cgcc_ix].ix,
553 cgcc[cgcc_ix].arg);
554
555 erts_free(ERTS_ALC_T_TMP, cgcc);
556
557 erts_runq_lock(esdp->run_queue);
558 }
559
560 void
erts_sched_init_check_cpu_bind(ErtsSchedulerData * esdp)561 erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp)
562 {
563 int cgcc_ix;
564 erts_cpu_groups_map_t *cgm;
565 erts_cpu_groups_callback_list_t *cgcl;
566 erts_cpu_groups_callback_call_t *cgcc;
567
568 erts_rwmtx_rlock(&cpuinfo_rwmtx);
569
570 cgcc = erts_alloc(ERTS_ALC_T_TMP,
571 (no_cpu_groups_callbacks
572 * sizeof(erts_cpu_groups_callback_call_t)));
573 cgcc_ix = 0;
574 for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
575 for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
576 cgcc[cgcc_ix].callback = cgcl->callback;
577 cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
578 cgcc[cgcc_ix].arg = cgcl->arg;
579 cgcc_ix++;
580 }
581 }
582
583 ASSERT(no_cpu_groups_callbacks == cgcc_ix);
584 erts_rwmtx_runlock(&cpuinfo_rwmtx);
585
586 for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
587 cgcc[cgcc_ix].callback(0,
588 esdp,
589 cgcc[cgcc_ix].ix,
590 cgcc[cgcc_ix].arg);
591
592 erts_free(ERTS_ALC_T_TMP, cgcc);
593
594 if (esdp->no <= max_main_threads)
595 erts_thr_set_main_status(1, (int) esdp->no);
596 }
597
598 static void
write_schedulers_bind_change(erts_cpu_topology_t * cpudata,int size)599 write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
600 {
601 int s_ix = 1;
602 int cpu_ix;
603
604 ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
605
606 if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
607
608 cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
609
610 for (cpu_ix = 0; cpu_ix < size && s_ix <= erts_no_schedulers; cpu_ix++)
611 if (erts_is_cpu_available(cpuinfo, cpudata[cpu_ix].logical))
612 scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
613 }
614
615 if (s_ix <= erts_no_schedulers)
616 for (; s_ix <= erts_no_schedulers; s_ix++)
617 scheduler2cpu_map[s_ix].bind_id = -1;
618 }
619
620 int
erts_init_scheduler_bind_type_string(char * how)621 erts_init_scheduler_bind_type_string(char *how)
622 {
623 ErtsCpuBindOrder order;
624
625 if (sys_strcmp(how, "u") == 0)
626 order = ERTS_CPU_BIND_NONE;
627 else if (sys_strcmp(how, "db") == 0)
628 order = ERTS_CPU_BIND_DEFAULT_BIND;
629 else if (sys_strcmp(how, "s") == 0)
630 order = ERTS_CPU_BIND_SPREAD;
631 else if (sys_strcmp(how, "ps") == 0)
632 order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
633 else if (sys_strcmp(how, "ts") == 0)
634 order = ERTS_CPU_BIND_THREAD_SPREAD;
635 else if (sys_strcmp(how, "tnnps") == 0)
636 order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
637 else if (sys_strcmp(how, "nnps") == 0)
638 order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
639 else if (sys_strcmp(how, "nnts") == 0)
640 order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
641 else if (sys_strcmp(how, "ns") == 0)
642 order = ERTS_CPU_BIND_NO_SPREAD;
643 else
644 return ERTS_INIT_SCHED_BIND_TYPE_ERROR_BAD_TYPE;
645
646 if (order != ERTS_CPU_BIND_NONE) {
647 if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP)
648 return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
649 else if (!system_cpudata && !user_cpudata)
650 return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
651 }
652
653 cpu_bind_order = order;
654
655 return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
656 }
657
658 static Eterm
bound_schedulers_term(ErtsCpuBindOrder order)659 bound_schedulers_term(ErtsCpuBindOrder order)
660 {
661 switch (order) {
662 case ERTS_CPU_BIND_SPREAD: {
663 ERTS_DECL_AM(spread);
664 return AM_spread;
665 }
666 case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
667 ERTS_DECL_AM(processor_spread);
668 return AM_processor_spread;
669 }
670 case ERTS_CPU_BIND_THREAD_SPREAD: {
671 ERTS_DECL_AM(thread_spread);
672 return AM_thread_spread;
673 }
674 case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
675 ERTS_DECL_AM(thread_no_node_processor_spread);
676 return AM_thread_no_node_processor_spread;
677 }
678 case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
679 ERTS_DECL_AM(no_node_processor_spread);
680 return AM_no_node_processor_spread;
681 }
682 case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
683 ERTS_DECL_AM(no_node_thread_spread);
684 return AM_no_node_thread_spread;
685 }
686 case ERTS_CPU_BIND_NO_SPREAD: {
687 ERTS_DECL_AM(no_spread);
688 return AM_no_spread;
689 }
690 case ERTS_CPU_BIND_NONE: {
691 ERTS_DECL_AM(unbound);
692 return AM_unbound;
693 }
694 default:
695 ASSERT(0);
696 return THE_NON_VALUE;
697 }
698 }
699
700 Eterm
erts_bound_schedulers_term(Process * c_p)701 erts_bound_schedulers_term(Process *c_p)
702 {
703 ErtsCpuBindOrder order;
704 erts_rwmtx_rlock(&cpuinfo_rwmtx);
705 order = cpu_bind_order;
706 erts_rwmtx_runlock(&cpuinfo_rwmtx);
707 return bound_schedulers_term(order);
708 }
709
710 Eterm
erts_bind_schedulers(Process * c_p,Eterm how)711 erts_bind_schedulers(Process *c_p, Eterm how)
712 {
713 int notify = 0;
714 Eterm res;
715 erts_cpu_topology_t *cpudata;
716 int cpudata_size;
717 ErtsCpuBindOrder old_cpu_bind_order;
718
719 erts_rwmtx_rwlock(&cpuinfo_rwmtx);
720
721 if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP) {
722 if (cpu_bind_order == ERTS_CPU_BIND_NONE
723 && ERTS_IS_ATOM_STR("unbound", how)) {
724 res = bound_schedulers_term(ERTS_CPU_BIND_NONE);
725 goto done;
726 }
727 ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
728 }
729 else {
730
731 old_cpu_bind_order = cpu_bind_order;
732
733 if (ERTS_IS_ATOM_STR("default_bind", how))
734 cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
735 else if (ERTS_IS_ATOM_STR("spread", how))
736 cpu_bind_order = ERTS_CPU_BIND_SPREAD;
737 else if (ERTS_IS_ATOM_STR("processor_spread", how))
738 cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
739 else if (ERTS_IS_ATOM_STR("thread_spread", how))
740 cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
741 else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
742 cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
743 else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
744 cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
745 else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
746 cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
747 else if (ERTS_IS_ATOM_STR("no_spread", how))
748 cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
749 else if (ERTS_IS_ATOM_STR("unbound", how))
750 cpu_bind_order = ERTS_CPU_BIND_NONE;
751 else {
752 cpu_bind_order = old_cpu_bind_order;
753 ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
754 goto done;
755 }
756
757 create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
758
759 if (!cpudata) {
760 cpu_bind_order = old_cpu_bind_order;
761 ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
762 goto done;
763 }
764
765 write_schedulers_bind_change(cpudata, cpudata_size);
766 notify = 1;
767
768 destroy_tmp_cpu_topology_copy(cpudata);
769
770 res = bound_schedulers_term(old_cpu_bind_order);
771 }
772
773 done:
774
775 erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
776
777 if (notify)
778 erts_sched_notify_check_cpu_bind();
779
780 return res;
781 }
782
783 int
erts_sched_bind_atthrcreate_prepare(void)784 erts_sched_bind_atthrcreate_prepare(void)
785 {
786 ErtsSchedulerData *esdp = erts_get_scheduler_data();
787 return esdp != NULL && erts_is_scheduler_bound(esdp);
788 }
789
790 int
erts_sched_bind_atthrcreate_child(int unbind)791 erts_sched_bind_atthrcreate_child(int unbind)
792 {
793 int res = 0;
794 if (unbind) {
795 erts_rwmtx_rlock(&cpuinfo_rwmtx);
796 res = erts_unbind_from_cpu(cpuinfo);
797 erts_rwmtx_runlock(&cpuinfo_rwmtx);
798 }
799 return res;
800 }
801
802 void
erts_sched_bind_atthrcreate_parent(int unbind)803 erts_sched_bind_atthrcreate_parent(int unbind)
804 {
805
806 }
807
808 int
erts_sched_bind_atfork_prepare(void)809 erts_sched_bind_atfork_prepare(void)
810 {
811 ErtsSchedulerData *esdp = erts_get_scheduler_data();
812 int unbind = esdp != NULL && erts_is_scheduler_bound(esdp);
813 if (unbind)
814 erts_rwmtx_rlock(&cpuinfo_rwmtx);
815 return unbind;
816 }
817
818 int
erts_sched_bind_atfork_child(int unbind)819 erts_sched_bind_atfork_child(int unbind)
820 {
821 if (unbind) {
822 ERTS_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
823 || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
824 return erts_unbind_from_cpu(cpuinfo);
825 }
826 return 0;
827 }
828
829 void
erts_sched_bind_atfork_parent(int unbind)830 erts_sched_bind_atfork_parent(int unbind)
831 {
832 if (unbind)
833 erts_rwmtx_runlock(&cpuinfo_rwmtx);
834 }
835
836 Eterm
erts_fake_scheduler_bindings(Process * p,Eterm how)837 erts_fake_scheduler_bindings(Process *p, Eterm how)
838 {
839 ErtsCpuBindOrder fake_cpu_bind_order;
840 erts_cpu_topology_t *cpudata;
841 int cpudata_size;
842 Eterm res;
843
844 if (ERTS_IS_ATOM_STR("default_bind", how))
845 fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
846 else if (ERTS_IS_ATOM_STR("spread", how))
847 fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
848 else if (ERTS_IS_ATOM_STR("processor_spread", how))
849 fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
850 else if (ERTS_IS_ATOM_STR("thread_spread", how))
851 fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
852 else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
853 fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
854 else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
855 fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
856 else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
857 fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
858 else if (ERTS_IS_ATOM_STR("no_spread", how))
859 fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
860 else if (ERTS_IS_ATOM_STR("unbound", how))
861 fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
862 else {
863 ERTS_BIF_PREP_ERROR(res, p, BADARG);
864 return res;
865 }
866
867 erts_rwmtx_rlock(&cpuinfo_rwmtx);
868 create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
869 erts_rwmtx_runlock(&cpuinfo_rwmtx);
870
871 if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
872 ERTS_BIF_PREP_RET(res, am_false);
873 else {
874 int i;
875 Eterm *hp;
876
877 cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
878
879 #ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
880
881 erts_fprintf(stderr, "node: ");
882 for (i = 0; i < cpudata_size; i++)
883 erts_fprintf(stderr, " %2d", cpudata[i].node);
884 erts_fprintf(stderr, "\n");
885 erts_fprintf(stderr, "processor: ");
886 for (i = 0; i < cpudata_size; i++)
887 erts_fprintf(stderr, " %2d", cpudata[i].processor);
888 erts_fprintf(stderr, "\n");
889 if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
890 && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
891 && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
892 erts_fprintf(stderr, "processor_node:");
893 for (i = 0; i < cpudata_size; i++)
894 erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
895 erts_fprintf(stderr, "\n");
896 }
897 erts_fprintf(stderr, "core: ");
898 for (i = 0; i < cpudata_size; i++)
899 erts_fprintf(stderr, " %2d", cpudata[i].core);
900 erts_fprintf(stderr, "\n");
901 erts_fprintf(stderr, "thread: ");
902 for (i = 0; i < cpudata_size; i++)
903 erts_fprintf(stderr, " %2d", cpudata[i].thread);
904 erts_fprintf(stderr, "\n");
905 erts_fprintf(stderr, "logical: ");
906 for (i = 0; i < cpudata_size; i++)
907 erts_fprintf(stderr, " %2d", cpudata[i].logical);
908 erts_fprintf(stderr, "\n");
909 #endif
910
911 hp = HAlloc(p, cpudata_size+1);
912 ERTS_BIF_PREP_RET(res, make_tuple(hp));
913 *hp++ = make_arityval((Uint) cpudata_size);
914 for (i = 0; i < cpudata_size; i++)
915 *hp++ = make_small((Uint) cpudata[i].logical);
916 }
917
918 destroy_tmp_cpu_topology_copy(cpudata);
919
920 return res;
921 }
922
923 Eterm
erts_get_schedulers_binds(Process * c_p)924 erts_get_schedulers_binds(Process *c_p)
925 {
926 int ix;
927 ERTS_DECL_AM(unbound);
928 Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
929 Eterm res = make_tuple(hp);
930
931 *(hp++) = make_arityval(erts_no_schedulers);
932 erts_rwmtx_rlock(&cpuinfo_rwmtx);
933 for (ix = 1; ix <= erts_no_schedulers; ix++)
934 *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
935 ? make_small(scheduler2cpu_map[ix].bound_id)
936 : AM_unbound);
937 erts_rwmtx_runlock(&cpuinfo_rwmtx);
938 return res;
939 }
940
941 /*
942 * CPU topology
943 */
944
945 typedef struct {
946 int *id;
947 int used;
948 int size;
949 } ErtsCpuTopIdSeq;
950
951 typedef struct {
952 ErtsCpuTopIdSeq logical;
953 ErtsCpuTopIdSeq thread;
954 ErtsCpuTopIdSeq core;
955 ErtsCpuTopIdSeq processor_node;
956 ErtsCpuTopIdSeq processor;
957 ErtsCpuTopIdSeq node;
958 } ErtsCpuTopEntry;
959
960 static void
init_cpu_top_entry(ErtsCpuTopEntry * cte)961 init_cpu_top_entry(ErtsCpuTopEntry *cte)
962 {
963 int size = 10;
964 cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
965 sizeof(int)*size);
966 cte->logical.size = size;
967 cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
968 sizeof(int)*size);
969 cte->thread.size = size;
970 cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
971 sizeof(int)*size);
972 cte->core.size = size;
973 cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
974 sizeof(int)*size);
975 cte->processor_node.size = size;
976 cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
977 sizeof(int)*size);
978 cte->processor.size = size;
979 cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
980 sizeof(int)*size);
981 cte->node.size = size;
982 }
983
984 static void
destroy_cpu_top_entry(ErtsCpuTopEntry * cte)985 destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
986 {
987 erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
988 erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
989 erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
990 erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
991 erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
992 erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
993 }
994
995 static int
get_cput_value_or_range(int * v,int * vr,char ** str)996 get_cput_value_or_range(int *v, int *vr, char **str)
997 {
998 long l;
999 char *c = *str;
1000 errno = 0;
1001 if (!isdigit((unsigned char)*c))
1002 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
1003 l = strtol(c, &c, 10);
1004 if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
1005 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
1006 *v = (int) l;
1007 if (*c == '-') {
1008 c++;
1009 if (!isdigit((unsigned char)*c))
1010 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1011 l = strtol(c, &c, 10);
1012 if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
1013 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1014 *vr = (int) l;
1015 }
1016 *str = c;
1017 return ERTS_INIT_CPU_TOPOLOGY_OK;
1018 }
1019
1020 static int
get_cput_id_seq(ErtsCpuTopIdSeq * idseq,char ** str)1021 get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
1022 {
1023 int ix = 0;
1024 int need_size = 0;
1025 char *c = *str;
1026
1027 while (1) {
1028 int res;
1029 int val;
1030 int nids;
1031 int val_range = -1;
1032 res = get_cput_value_or_range(&val, &val_range, &c);
1033 if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
1034 return res;
1035 if (val_range < 0 || val_range == val)
1036 nids = 1;
1037 else {
1038 if (val_range > val)
1039 nids = val_range - val + 1;
1040 else
1041 nids = val - val_range + 1;
1042 }
1043 need_size += nids;
1044 if (need_size > idseq->size) {
1045 idseq->size = need_size + 10;
1046 idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
1047 idseq->id,
1048 sizeof(int)*idseq->size);
1049 }
1050 if (nids == 1)
1051 idseq->id[ix++] = val;
1052 else if (val_range > val) {
1053 for (; val <= val_range; val++)
1054 idseq->id[ix++] = val;
1055 }
1056 else {
1057 for (; val >= val_range; val--)
1058 idseq->id[ix++] = val;
1059 }
1060 if (*c != ',')
1061 break;
1062 c++;
1063 }
1064 *str = c;
1065 idseq->used = ix;
1066 return ERTS_INIT_CPU_TOPOLOGY_OK;
1067 }
1068
1069 static int
get_cput_entry(ErtsCpuTopEntry * cput,char ** str)1070 get_cput_entry(ErtsCpuTopEntry *cput, char **str)
1071 {
1072 int h;
1073 char *c = *str;
1074
1075 cput->logical.used = 0;
1076 cput->thread.id[0] = 0;
1077 cput->thread.used = 1;
1078 cput->core.id[0] = 0;
1079 cput->core.used = 1;
1080 cput->processor_node.id[0] = -1;
1081 cput->processor_node.used = 1;
1082 cput->processor.id[0] = 0;
1083 cput->processor.used = 1;
1084 cput->node.id[0] = -1;
1085 cput->node.used = 1;
1086
1087 h = ERTS_TOPOLOGY_MAX_DEPTH;
1088 while (*c != ':' && *c != '\0') {
1089 int res;
1090 ErtsCpuTopIdSeq *idseqp;
1091 switch (*c++) {
1092 case 'L':
1093 if (h <= ERTS_TOPOLOGY_LOGICAL)
1094 return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1095 idseqp = &cput->logical;
1096 h = ERTS_TOPOLOGY_LOGICAL;
1097 break;
1098 case 't':
1099 case 'T':
1100 if (h <= ERTS_TOPOLOGY_THREAD)
1101 return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1102 idseqp = &cput->thread;
1103 h = ERTS_TOPOLOGY_THREAD;
1104 break;
1105 case 'c':
1106 case 'C':
1107 if (h <= ERTS_TOPOLOGY_CORE)
1108 return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1109 idseqp = &cput->core;
1110 h = ERTS_TOPOLOGY_CORE;
1111 break;
1112 case 'p':
1113 case 'P':
1114 if (h <= ERTS_TOPOLOGY_PROCESSOR)
1115 return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1116 idseqp = &cput->processor;
1117 h = ERTS_TOPOLOGY_PROCESSOR;
1118 break;
1119 case 'n':
1120 case 'N':
1121 if (h <= ERTS_TOPOLOGY_PROCESSOR) {
1122 do_node:
1123 if (h <= ERTS_TOPOLOGY_NODE)
1124 return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1125 idseqp = &cput->node;
1126 h = ERTS_TOPOLOGY_NODE;
1127 }
1128 else {
1129 int p_node = 0;
1130 char *p_chk = c;
1131 while (*p_chk != '\0' && *p_chk != ':') {
1132 if (*p_chk == 'p' || *p_chk == 'P') {
1133 p_node = 1;
1134 break;
1135 }
1136 p_chk++;
1137 }
1138 if (!p_node)
1139 goto do_node;
1140 if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
1141 return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
1142 idseqp = &cput->processor_node;
1143 h = ERTS_TOPOLOGY_PROCESSOR_NODE;
1144 }
1145 break;
1146 default:
1147 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
1148 }
1149 res = get_cput_id_seq(idseqp, &c);
1150 if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
1151 return res;
1152 }
1153
1154 if (cput->logical.used < 1)
1155 return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
1156
1157 if (*c == ':') {
1158 c++;
1159 }
1160
1161 if (cput->thread.used != 1
1162 && cput->thread.used != cput->logical.used)
1163 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1164 if (cput->core.used != 1
1165 && cput->core.used != cput->logical.used)
1166 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1167 if (cput->processor_node.used != 1
1168 && cput->processor_node.used != cput->logical.used)
1169 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1170 if (cput->processor.used != 1
1171 && cput->processor.used != cput->logical.used)
1172 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1173 if (cput->node.used != 1
1174 && cput->node.used != cput->logical.used)
1175 return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
1176
1177 *str = c;
1178 return ERTS_INIT_CPU_TOPOLOGY_OK;
1179 }
1180
1181 static int
verify_topology(erts_cpu_topology_t * cpudata,int size)1182 verify_topology(erts_cpu_topology_t *cpudata, int size)
1183 {
1184 if (size > 0) {
1185 int *logical;
1186 int node, processor, no_nodes, i;
1187
1188 /* Verify logical ids */
1189 logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
1190
1191 for (i = 0; i < size; i++)
1192 logical[i] = cpudata[i].logical;
1193
1194 qsort(logical, size, sizeof(int), int_cmp);
1195 for (i = 0; i < size-1; i++) {
1196 if (logical[i] == logical[i+1]) {
1197 erts_free(ERTS_ALC_T_TMP, logical);
1198 return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
1199 }
1200 }
1201
1202 erts_free(ERTS_ALC_T_TMP, logical);
1203
1204 qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
1205
1206 /* Verify unique entities */
1207
1208 for (i = 1; i < size; i++) {
1209 if (cpudata[i-1].processor == cpudata[i].processor
1210 && cpudata[i-1].node == cpudata[i].node
1211 && (cpudata[i-1].processor_node
1212 == cpudata[i].processor_node)
1213 && cpudata[i-1].core == cpudata[i].core
1214 && cpudata[i-1].thread == cpudata[i].thread) {
1215 return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
1216 }
1217 }
1218
1219 /* Verify numa nodes */
1220 node = cpudata[0].node;
1221 processor = cpudata[0].processor;
1222 no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
1223 for (i = 1; i < size; i++) {
1224 if (no_nodes) {
1225 if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
1226 return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
1227 }
1228 else {
1229 if (cpudata[i].processor == processor && cpudata[i].node != node)
1230 return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
1231 node = cpudata[i].node;
1232 processor = cpudata[i].processor;
1233 if (node >= 0 && cpudata[i].processor_node >= 0)
1234 return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
1235 if (node < 0 && cpudata[i].processor_node < 0)
1236 return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
1237 }
1238 }
1239 }
1240
1241 return ERTS_INIT_CPU_TOPOLOGY_OK;
1242 }
1243
1244 int
erts_init_cpu_topology_string(char * topology_str)1245 erts_init_cpu_topology_string(char *topology_str)
1246 {
1247 ErtsCpuTopEntry cput;
1248 int need_size;
1249 char *c;
1250 int ix;
1251 int error = ERTS_INIT_CPU_TOPOLOGY_OK;
1252
1253 if (user_cpudata)
1254 erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
1255 user_cpudata_size = 10;
1256
1257 user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
1258 (sizeof(erts_cpu_topology_t)
1259 * user_cpudata_size));
1260
1261 init_cpu_top_entry(&cput);
1262
1263 ix = 0;
1264 need_size = 0;
1265
1266 c = topology_str;
1267 if (*c == '\0') {
1268 error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
1269 goto fail;
1270 }
1271 do {
1272 int r;
1273 error = get_cput_entry(&cput, &c);
1274 if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
1275 goto fail;
1276 need_size += cput.logical.used;
1277 if (user_cpudata_size < need_size) {
1278 user_cpudata_size = need_size + 10;
1279 user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
1280 user_cpudata,
1281 (sizeof(erts_cpu_topology_t)
1282 * user_cpudata_size));
1283 }
1284
1285 ASSERT(cput.thread.used == 1
1286 || cput.thread.used == cput.logical.used);
1287 ASSERT(cput.core.used == 1
1288 || cput.core.used == cput.logical.used);
1289 ASSERT(cput.processor_node.used == 1
1290 || cput.processor_node.used == cput.logical.used);
1291 ASSERT(cput.processor.used == 1
1292 || cput.processor.used == cput.logical.used);
1293 ASSERT(cput.node.used == 1
1294 || cput.node.used == cput.logical.used);
1295
1296 for (r = 0; r < cput.logical.used; r++) {
1297 user_cpudata[ix].logical = cput.logical.id[r];
1298 user_cpudata[ix].thread =
1299 cput.thread.id[cput.thread.used == 1 ? 0 : r];
1300 user_cpudata[ix].core =
1301 cput.core.id[cput.core.used == 1 ? 0 : r];
1302 user_cpudata[ix].processor_node =
1303 cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
1304 user_cpudata[ix].processor =
1305 cput.processor.id[cput.processor.used == 1 ? 0 : r];
1306 user_cpudata[ix].node =
1307 cput.node.id[cput.node.used == 1 ? 0 : r];
1308 ix++;
1309 }
1310 } while (*c != '\0');
1311
1312 if (user_cpudata_size != ix) {
1313 user_cpudata_size = ix;
1314 user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
1315 user_cpudata,
1316 (sizeof(erts_cpu_topology_t)
1317 * user_cpudata_size));
1318 }
1319
1320 error = verify_topology(user_cpudata, user_cpudata_size);
1321 if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
1322 destroy_cpu_top_entry(&cput);
1323 return ERTS_INIT_CPU_TOPOLOGY_OK;
1324 }
1325
1326 fail:
1327 if (user_cpudata)
1328 erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
1329 user_cpudata_size = 0;
1330 destroy_cpu_top_entry(&cput);
1331 return error;
1332 }
1333
1334 #define ERTS_GET_CPU_TOPOLOGY_ERROR -1
1335 #define ERTS_GET_USED_CPU_TOPOLOGY 0
1336 #define ERTS_GET_DETECTED_CPU_TOPOLOGY 1
1337 #define ERTS_GET_DEFINED_CPU_TOPOLOGY 2
1338
1339 static Eterm get_cpu_topology_term(Process *c_p, int type);
1340
1341 Eterm
erts_set_cpu_topology(Process * c_p,Eterm term)1342 erts_set_cpu_topology(Process *c_p, Eterm term)
1343 {
1344 erts_cpu_topology_t *cpudata = NULL;
1345 int cpudata_size = 0;
1346 Eterm res;
1347
1348 erts_rwmtx_rwlock(&cpuinfo_rwmtx);
1349 res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
1350 if (term == am_undefined) {
1351 if (user_cpudata)
1352 erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
1353 user_cpudata = NULL;
1354 user_cpudata_size = 0;
1355
1356 if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
1357 cpudata_size = system_cpudata_size;
1358 cpudata = erts_alloc(ERTS_ALC_T_TMP,
1359 (sizeof(erts_cpu_topology_t)
1360 * cpudata_size));
1361
1362 sys_memcpy((void *) cpudata,
1363 (void *) system_cpudata,
1364 sizeof(erts_cpu_topology_t)*cpudata_size);
1365 }
1366 }
1367 else if (is_not_list(term)) {
1368 error:
1369 erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1370 res = THE_NON_VALUE;
1371 goto done;
1372 }
1373 else {
1374 Eterm list = term;
1375 int ix = 0;
1376
1377 cpudata_size = 100;
1378 cpudata = erts_alloc(ERTS_ALC_T_TMP,
1379 (sizeof(erts_cpu_topology_t)
1380 * cpudata_size));
1381
1382 while (is_list(list)) {
1383 Eterm *lp = list_val(list);
1384 Eterm cpu = CAR(lp);
1385 Eterm* tp;
1386 Sint id;
1387
1388 if (is_not_tuple(cpu))
1389 goto error;
1390
1391 tp = tuple_val(cpu);
1392
1393 if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
1394 goto error;
1395
1396 if (ix >= cpudata_size) {
1397 cpudata_size += 100;
1398 cpudata = erts_realloc(ERTS_ALC_T_TMP,
1399 cpudata,
1400 (sizeof(erts_cpu_topology_t)
1401 * cpudata_size));
1402 }
1403
1404 id = signed_val(tp[2]);
1405 if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1406 goto error;
1407 cpudata[ix].node = (int) id;
1408
1409 id = signed_val(tp[3]);
1410 if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1411 goto error;
1412 cpudata[ix].processor = (int) id;
1413
1414 id = signed_val(tp[4]);
1415 if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1416 goto error;
1417 cpudata[ix].processor_node = (int) id;
1418
1419 id = signed_val(tp[5]);
1420 if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1421 goto error;
1422 cpudata[ix].core = (int) id;
1423
1424 id = signed_val(tp[6]);
1425 if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1426 goto error;
1427 cpudata[ix].thread = (int) id;
1428
1429 id = signed_val(tp[7]);
1430 if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
1431 goto error;
1432 cpudata[ix].logical = (int) id;
1433
1434 list = CDR(lp);
1435 ix++;
1436 }
1437
1438 if (is_not_nil(list))
1439 goto error;
1440
1441 cpudata_size = ix;
1442
1443 if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
1444 goto error;
1445
1446 if (user_cpudata_size != cpudata_size) {
1447 if (user_cpudata)
1448 erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
1449 user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
1450 sizeof(erts_cpu_topology_t)*cpudata_size);
1451 user_cpudata_size = cpudata_size;
1452 }
1453
1454 sys_memcpy((void *) user_cpudata,
1455 (void *) cpudata,
1456 sizeof(erts_cpu_topology_t)*cpudata_size);
1457 }
1458
1459 update_cpu_groups_maps();
1460
1461 write_schedulers_bind_change(cpudata, cpudata_size);
1462
1463 erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1464 erts_sched_notify_check_cpu_bind();
1465
1466 done:
1467
1468 if (cpudata)
1469 erts_free(ERTS_ALC_T_TMP, cpudata);
1470
1471 return res;
1472 }
1473
1474 static void
create_tmp_cpu_topology_copy(erts_cpu_topology_t ** cpudata,int * cpudata_size)1475 create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
1476 {
1477 if (user_cpudata) {
1478 *cpudata_size = user_cpudata_size;
1479 *cpudata = erts_alloc(ERTS_ALC_T_TMP,
1480 (sizeof(erts_cpu_topology_t)
1481 * (*cpudata_size)));
1482 sys_memcpy((void *) *cpudata,
1483 (void *) user_cpudata,
1484 sizeof(erts_cpu_topology_t)*(*cpudata_size));
1485 }
1486 else if (system_cpudata) {
1487 *cpudata_size = system_cpudata_size;
1488 *cpudata = erts_alloc(ERTS_ALC_T_TMP,
1489 (sizeof(erts_cpu_topology_t)
1490 * (*cpudata_size)));
1491 sys_memcpy((void *) *cpudata,
1492 (void *) system_cpudata,
1493 sizeof(erts_cpu_topology_t)*(*cpudata_size));
1494 }
1495 else {
1496 *cpudata = NULL;
1497 *cpudata_size = 0;
1498 }
1499 }
1500
1501 static void
destroy_tmp_cpu_topology_copy(erts_cpu_topology_t * cpudata)1502 destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
1503 {
1504 if (cpudata)
1505 erts_free(ERTS_ALC_T_TMP, cpudata);
1506 }
1507
1508
1509 static Eterm
bld_topology_term(Eterm ** hpp,Uint * hszp,erts_cpu_topology_t * cpudata,int size)1510 bld_topology_term(Eterm **hpp,
1511 Uint *hszp,
1512 erts_cpu_topology_t *cpudata,
1513 int size)
1514 {
1515 Eterm res = NIL;
1516 int i;
1517
1518 if (size == 0)
1519 return am_undefined;
1520
1521 for (i = size-1; i >= 0; i--) {
1522 res = erts_bld_cons(hpp,
1523 hszp,
1524 erts_bld_tuple(hpp,
1525 hszp,
1526 7,
1527 am_cpu,
1528 make_small(cpudata[i].node),
1529 make_small(cpudata[i].processor),
1530 make_small(cpudata[i].processor_node),
1531 make_small(cpudata[i].core),
1532 make_small(cpudata[i].thread),
1533 make_small(cpudata[i].logical)),
1534 res);
1535 }
1536 return res;
1537 }
1538
1539 static Eterm
get_cpu_topology_term(Process * c_p,int type)1540 get_cpu_topology_term(Process *c_p, int type)
1541 {
1542 #ifdef DEBUG
1543 Eterm *hp_end;
1544 #endif
1545 Eterm *hp;
1546 Uint hsz;
1547 Eterm res = THE_NON_VALUE;
1548 erts_cpu_topology_t *cpudata = NULL;
1549 int size = 0;
1550
1551 switch (type) {
1552 case ERTS_GET_USED_CPU_TOPOLOGY:
1553 if (user_cpudata)
1554 goto defined;
1555 else
1556 goto detected;
1557 case ERTS_GET_DETECTED_CPU_TOPOLOGY:
1558 detected:
1559 if (!system_cpudata)
1560 res = am_undefined;
1561 else {
1562 size = system_cpudata_size;
1563 cpudata = erts_alloc(ERTS_ALC_T_TMP,
1564 (sizeof(erts_cpu_topology_t)
1565 * size));
1566 sys_memcpy((void *) cpudata,
1567 (void *) system_cpudata,
1568 sizeof(erts_cpu_topology_t)*size);
1569 }
1570 break;
1571 case ERTS_GET_DEFINED_CPU_TOPOLOGY:
1572 defined:
1573 if (!user_cpudata)
1574 res = am_undefined;
1575 else {
1576 size = user_cpudata_size;
1577 cpudata = user_cpudata;
1578 }
1579 break;
1580 default:
1581 erts_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
1582 break;
1583 }
1584
1585 if (res == am_undefined) {
1586 ASSERT(!cpudata);
1587 return res;
1588 }
1589
1590 hsz = 0;
1591
1592 bld_topology_term(NULL, &hsz,
1593 cpudata, size);
1594
1595 hp = HAlloc(c_p, hsz);
1596
1597 #ifdef DEBUG
1598 hp_end = hp + hsz;
1599 #endif
1600
1601 res = bld_topology_term(&hp, NULL,
1602 cpudata, size);
1603
1604 ASSERT(hp_end == hp);
1605
1606 if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
1607 erts_free(ERTS_ALC_T_TMP, cpudata);
1608
1609 return res;
1610 }
1611
1612 Eterm
erts_get_cpu_topology_term(Process * c_p,Eterm which)1613 erts_get_cpu_topology_term(Process *c_p, Eterm which)
1614 {
1615 Eterm res;
1616 int type;
1617 erts_rwmtx_rlock(&cpuinfo_rwmtx);
1618 if (ERTS_IS_ATOM_STR("used", which))
1619 type = ERTS_GET_USED_CPU_TOPOLOGY;
1620 else if (ERTS_IS_ATOM_STR("detected", which))
1621 type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
1622 else if (ERTS_IS_ATOM_STR("defined", which))
1623 type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
1624 else
1625 type = ERTS_GET_CPU_TOPOLOGY_ERROR;
1626 if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
1627 res = THE_NON_VALUE;
1628 else
1629 res = get_cpu_topology_term(c_p, type);
1630 erts_rwmtx_runlock(&cpuinfo_rwmtx);
1631 return res;
1632 }
1633
1634 static void
get_logical_processors(int * conf,int * onln,int * avail,int * quota)1635 get_logical_processors(int *conf, int *onln, int *avail, int *quota)
1636 {
1637 if (conf)
1638 *conf = erts_get_cpu_configured(cpuinfo);
1639 if (onln)
1640 *onln = erts_get_cpu_online(cpuinfo);
1641 if (avail)
1642 *avail = erts_get_cpu_available(cpuinfo);
1643 if (quota)
1644 *quota = erts_get_cpu_quota(cpuinfo);
1645 }
1646
1647 void
erts_get_logical_processors(int * conf,int * onln,int * avail,int * quota)1648 erts_get_logical_processors(int *conf, int *onln, int *avail, int *quota)
1649 {
1650 erts_rwmtx_rlock(&cpuinfo_rwmtx);
1651 get_logical_processors(conf, onln, avail, quota);
1652 erts_rwmtx_runlock(&cpuinfo_rwmtx);
1653 }
1654
1655 void
erts_pre_early_init_cpu_topology(int * max_dcg_p,int * max_rg_p,int * conf_p,int * onln_p,int * avail_p,int * quota_p)1656 erts_pre_early_init_cpu_topology(int *max_dcg_p,
1657 int *max_rg_p,
1658 int *conf_p,
1659 int *onln_p,
1660 int *avail_p,
1661 int *quota_p)
1662 {
1663 cpu_groups_maps = NULL;
1664 no_cpu_groups_callbacks = 0;
1665 *max_rg_p = ERTS_MAX_READER_GROUPS;
1666 *max_dcg_p = ERTS_MAX_FLXCTR_GROUPS;
1667 cpuinfo = erts_cpu_info_create();
1668 get_logical_processors(conf_p, onln_p, avail_p, quota_p);
1669 }
1670
1671 void
erts_early_init_cpu_topology(int no_schedulers,int * max_main_threads_p,int max_reader_groups,int * reader_groups_p,int max_decentralized_counter_groups,int * decentralized_counter_groups_p)1672 erts_early_init_cpu_topology(int no_schedulers,
1673 int *max_main_threads_p,
1674 int max_reader_groups,
1675 int *reader_groups_p,
1676 int max_decentralized_counter_groups,
1677 int *decentralized_counter_groups_p)
1678 {
1679 user_cpudata = NULL;
1680 user_cpudata_size = 0;
1681
1682 system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
1683 system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
1684 (sizeof(erts_cpu_topology_t)
1685 * system_cpudata_size));
1686
1687 cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
1688
1689 if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
1690 || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
1691 system_cpudata_size)) {
1692 erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
1693 system_cpudata = NULL;
1694 system_cpudata_size = 0;
1695 }
1696
1697 max_main_threads = erts_get_cpu_configured(cpuinfo);
1698 if (max_main_threads > no_schedulers || max_main_threads < 0)
1699 max_main_threads = no_schedulers;
1700 *max_main_threads_p = max_main_threads;
1701
1702 decentralized_counter_groups = max_main_threads;
1703 if (decentralized_counter_groups <= 1 || max_decentralized_counter_groups <= 1)
1704 decentralized_counter_groups = 1;
1705 if (decentralized_counter_groups > max_decentralized_counter_groups)
1706 decentralized_counter_groups = max_decentralized_counter_groups;
1707 *decentralized_counter_groups_p = decentralized_counter_groups;
1708 reader_groups = max_main_threads;
1709 if (reader_groups <= 1 || max_reader_groups <= 1)
1710 reader_groups = 0;
1711 if (reader_groups > max_reader_groups)
1712 reader_groups = max_reader_groups;
1713 *reader_groups_p = reader_groups;
1714 }
1715
1716 void
erts_init_cpu_topology(void)1717 erts_init_cpu_topology(void)
1718 {
1719 int ix;
1720
1721 erts_rwmtx_init(&cpuinfo_rwmtx, "cpu_info", NIL,
1722 ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
1723 erts_rwmtx_rwlock(&cpuinfo_rwmtx);
1724
1725 scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
1726 (sizeof(ErtsCpuBindData)
1727 * (erts_no_schedulers+1)));
1728 for (ix = 1; ix <= erts_no_schedulers; ix++) {
1729 scheduler2cpu_map[ix].bind_id = -1;
1730 scheduler2cpu_map[ix].bound_id = -1;
1731 }
1732
1733 if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED)
1734 cpu_bind_order = ERTS_CPU_BIND_NONE;
1735
1736 reader_groups_map = add_cpu_groups(reader_groups,
1737 reader_groups_callback,
1738 NULL);
1739 decentralized_counter_groups_map = add_cpu_groups(decentralized_counter_groups,
1740 flxctr_groups_callback,
1741 NULL);
1742
1743 if (cpu_bind_order == ERTS_CPU_BIND_NONE)
1744 erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1745 else {
1746 erts_cpu_topology_t *cpudata;
1747 int cpudata_size;
1748 create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
1749 write_schedulers_bind_change(cpudata, cpudata_size);
1750 erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1751 erts_sched_notify_check_cpu_bind();
1752 destroy_tmp_cpu_topology_copy(cpudata);
1753 }
1754 }
1755
1756 int
erts_update_cpu_info(void)1757 erts_update_cpu_info(void)
1758 {
1759 int changed;
1760 erts_rwmtx_rwlock(&cpuinfo_rwmtx);
1761 changed = erts_cpu_info_update(cpuinfo);
1762 if (changed) {
1763 erts_cpu_topology_t *cpudata;
1764 int cpudata_size;
1765
1766 if (system_cpudata)
1767 erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
1768
1769 system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
1770 if (!system_cpudata_size)
1771 system_cpudata = NULL;
1772 else {
1773 system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
1774 (sizeof(erts_cpu_topology_t)
1775 * system_cpudata_size));
1776
1777 if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
1778 || (ERTS_INIT_CPU_TOPOLOGY_OK
1779 != verify_topology(system_cpudata,
1780 system_cpudata_size))) {
1781 erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
1782 system_cpudata = NULL;
1783 system_cpudata_size = 0;
1784 }
1785 }
1786
1787 update_cpu_groups_maps();
1788
1789 create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
1790 write_schedulers_bind_change(cpudata, cpudata_size);
1791 destroy_tmp_cpu_topology_copy(cpudata);
1792 }
1793 erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
1794 if (changed)
1795 erts_sched_notify_check_cpu_bind();
1796 return changed;
1797 }
1798
1799 /*
1800 * reader groups map
1801 */
1802
1803 void
reader_groups_callback(int suspending,ErtsSchedulerData * esdp,int group,void * unused)1804 reader_groups_callback(int suspending,
1805 ErtsSchedulerData *esdp,
1806 int group,
1807 void *unused)
1808 {
1809 if (reader_groups && esdp->no <= max_main_threads)
1810 erts_rwmtx_set_reader_group(suspending ? 0 : group+1);
1811 }
1812
1813 void
flxctr_groups_callback(int suspending,ErtsSchedulerData * esdp,int group,void * unused)1814 flxctr_groups_callback(int suspending,
1815 ErtsSchedulerData *esdp,
1816 int group,
1817 void *unused)
1818 {
1819 erts_flxctr_set_slot(suspending ? 0 : group+1);
1820 }
1821
1822 static Eterm get_cpu_groups_map(Process *c_p,
1823 erts_cpu_groups_map_t *map,
1824 int offset);
1825 Eterm
erts_debug_reader_groups_map(Process * c_p,int groups)1826 erts_debug_reader_groups_map(Process *c_p, int groups)
1827 {
1828 Eterm res;
1829 erts_cpu_groups_map_t test;
1830
1831 test.array = NULL;
1832 test.groups = groups;
1833 make_cpu_groups_map(&test, 1);
1834 if (!test.array)
1835 res = NIL;
1836 else {
1837 res = get_cpu_groups_map(c_p, &test, 1);
1838 erts_free(ERTS_ALC_T_TMP, test.array);
1839 }
1840 return res;
1841 }
1842
1843
1844 Eterm
erts_get_reader_groups_map(Process * c_p)1845 erts_get_reader_groups_map(Process *c_p)
1846 {
1847 Eterm res;
1848 erts_rwmtx_rlock(&cpuinfo_rwmtx);
1849 res = get_cpu_groups_map(c_p, reader_groups_map, 1);
1850 erts_rwmtx_runlock(&cpuinfo_rwmtx);
1851 return res;
1852 }
1853
1854 Eterm
erts_get_decentralized_counter_groups_map(Process * c_p)1855 erts_get_decentralized_counter_groups_map(Process *c_p)
1856 {
1857 Eterm res;
1858 erts_rwmtx_rlock(&cpuinfo_rwmtx);
1859 res = get_cpu_groups_map(c_p, decentralized_counter_groups_map, 1);
1860 erts_rwmtx_runlock(&cpuinfo_rwmtx);
1861 return res;
1862 }
1863
1864 /*
1865 * CPU groups
1866 */
1867
1868 static Eterm
get_cpu_groups_map(Process * c_p,erts_cpu_groups_map_t * map,int offset)1869 get_cpu_groups_map(Process *c_p,
1870 erts_cpu_groups_map_t *map,
1871 int offset)
1872 {
1873 #ifdef DEBUG
1874 Eterm *endp;
1875 #endif
1876 Eterm res = NIL, tuple;
1877 Eterm *hp;
1878 int i;
1879
1880 hp = HAlloc(c_p, map->logical_processors*(2+3));
1881 #ifdef DEBUG
1882 endp = hp + map->logical_processors*(2+3);
1883 #endif
1884 for (i = map->size - 1; i >= 0; i--) {
1885 if (map->array[i].logical >= 0) {
1886 tuple = TUPLE2(hp,
1887 make_small(map->array[i].logical),
1888 make_small(map->array[i].cpu_group + offset));
1889 hp += 3;
1890 res = CONS(hp, tuple, res);
1891 hp += 2;
1892 }
1893 }
1894 ASSERT(hp == endp);
1895 return res;
1896 }
1897
1898 static void
make_available_cpu_topology(erts_avail_cput * no,erts_avail_cput * avail,erts_cpu_topology_t * cpudata,int * size,int test)1899 make_available_cpu_topology(erts_avail_cput *no,
1900 erts_avail_cput *avail,
1901 erts_cpu_topology_t *cpudata,
1902 int *size,
1903 int test)
1904 {
1905 int len = *size;
1906 erts_cpu_topology_t last;
1907 int a, i, j;
1908
1909 no->level[ERTS_TOPOLOGY_NODE] = -1;
1910 no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
1911 no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
1912 no->level[ERTS_TOPOLOGY_CORE] = -1;
1913 no->level[ERTS_TOPOLOGY_THREAD] = -1;
1914 no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
1915
1916 last.node = INT_MIN;
1917 last.processor = INT_MIN;
1918 last.processor_node = INT_MIN;
1919 last.core = INT_MIN;
1920 last.thread = INT_MIN;
1921 last.logical = INT_MIN;
1922
1923 a = 0;
1924
1925 for (i = 0; i < len; i++) {
1926
1927 if (!test && !erts_is_cpu_available(cpuinfo, cpudata[i].logical))
1928 continue;
1929
1930 if (last.node != cpudata[i].node)
1931 goto node;
1932 if (last.processor != cpudata[i].processor)
1933 goto processor;
1934 if (last.processor_node != cpudata[i].processor_node)
1935 goto processor_node;
1936 if (last.core != cpudata[i].core)
1937 goto core;
1938 ASSERT(last.thread != cpudata[i].thread);
1939 goto thread;
1940
1941 node:
1942 no->level[ERTS_TOPOLOGY_NODE]++;
1943 processor:
1944 no->level[ERTS_TOPOLOGY_PROCESSOR]++;
1945 processor_node:
1946 no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
1947 core:
1948 no->level[ERTS_TOPOLOGY_CORE]++;
1949 thread:
1950 no->level[ERTS_TOPOLOGY_THREAD]++;
1951
1952 no->level[ERTS_TOPOLOGY_LOGICAL]++;
1953
1954 for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
1955 avail[a].level[j] = no->level[j];
1956
1957 avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
1958 avail[a].level[ERTS_TOPOLOGY_CG] = 0;
1959
1960 ASSERT(last.logical != cpudata[i].logical);
1961
1962 last = cpudata[i];
1963 a++;
1964 }
1965
1966 no->level[ERTS_TOPOLOGY_NODE]++;
1967 no->level[ERTS_TOPOLOGY_PROCESSOR]++;
1968 no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
1969 no->level[ERTS_TOPOLOGY_CORE]++;
1970 no->level[ERTS_TOPOLOGY_THREAD]++;
1971 no->level[ERTS_TOPOLOGY_LOGICAL]++;
1972
1973 *size = a;
1974 }
1975
1976 static void
cpu_group_insert(erts_cpu_groups_map_t * map,int logical,int cpu_group)1977 cpu_group_insert(erts_cpu_groups_map_t *map,
1978 int logical, int cpu_group)
1979 {
1980 int start = logical % map->size;
1981 int ix = start;
1982
1983 do {
1984 if (map->array[ix].logical < 0) {
1985 map->array[ix].logical = logical;
1986 map->array[ix].cpu_group = cpu_group;
1987 return;
1988 }
1989 ix++;
1990 if (ix == map->size)
1991 ix = 0;
1992 } while (ix != start);
1993
1994 erts_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
1995 }
1996
1997
1998 static int
sub_levels(erts_cpu_groups_count_t * cgc,int level,int aix,int avail_sz,erts_avail_cput * avail)1999 sub_levels(erts_cpu_groups_count_t *cgc, int level, int aix,
2000 int avail_sz, erts_avail_cput *avail)
2001 {
2002 int sub_level = level+1;
2003 int last = -1;
2004 cgc->sub_levels = 0;
2005
2006 do {
2007 if (last != avail[aix].level[sub_level]) {
2008 cgc->sub_levels++;
2009 last = avail[aix].level[sub_level];
2010 }
2011 aix++;
2012 }
2013 while (aix < avail_sz && cgc->id == avail[aix].level[level]);
2014 cgc->cpu_groups = 0;
2015 return aix;
2016 }
2017
2018 static int
write_cpu_groups(int * cgp,erts_cpu_groups_count_t * cgcp,int level,int a,int avail_sz,erts_avail_cput * avail)2019 write_cpu_groups(int *cgp, erts_cpu_groups_count_t *cgcp,
2020 int level, int a,
2021 int avail_sz, erts_avail_cput *avail)
2022 {
2023 int cg = *cgp;
2024 int sub_level = level+1;
2025 int sl_per_gr = cgcp->sub_levels / cgcp->cpu_groups;
2026 int xsl = cgcp->sub_levels % cgcp->cpu_groups;
2027 int sls = 0;
2028 int last = -1;
2029 int xsl_cg_lim = (cgcp->cpu_groups - xsl) + cg + 1;
2030
2031 ASSERT(level < 0 || avail[a].level[level] == cgcp->id);
2032
2033 do {
2034 if (last != avail[a].level[sub_level]) {
2035 if (!sls) {
2036 sls = sl_per_gr;
2037 cg++;
2038 if (cg >= xsl_cg_lim)
2039 sls++;
2040 }
2041 last = avail[a].level[sub_level];
2042 sls--;
2043 }
2044 avail[a].level[ERTS_TOPOLOGY_CG] = cg;
2045 a++;
2046 } while (a < avail_sz && (level < 0
2047 || avail[a].level[level] == cgcp->id));
2048
2049 ASSERT(cgcp->cpu_groups == cg - *cgp);
2050
2051 *cgp = cg;
2052
2053 return a;
2054 }
2055
2056 static int
cg_count_sub_levels_compare(const void * vx,const void * vy)2057 cg_count_sub_levels_compare(const void *vx, const void *vy)
2058 {
2059 erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
2060 erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
2061 if (x->sub_levels != y->sub_levels)
2062 return y->sub_levels - x->sub_levels;
2063 return x->id - y->id;
2064 }
2065
2066 static int
cg_count_id_compare(const void * vx,const void * vy)2067 cg_count_id_compare(const void *vx, const void *vy)
2068 {
2069 erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
2070 erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
2071 return x->id - y->id;
2072 }
2073
2074 static void
make_cpu_groups_map(erts_cpu_groups_map_t * map,int test)2075 make_cpu_groups_map(erts_cpu_groups_map_t *map, int test)
2076 {
2077 int i, spread_level, avail_sz;
2078 erts_avail_cput no, *avail;
2079 erts_cpu_topology_t *cpudata;
2080 ErtsAlcType_t alc_type = (test
2081 ? ERTS_ALC_T_TMP
2082 : ERTS_ALC_T_CPU_GRPS_MAP);
2083
2084 if (map->array)
2085 erts_free(alc_type, map->array);
2086
2087 map->array = NULL;
2088 map->logical_processors = 0;
2089 map->size = 0;
2090
2091 if (!map->groups)
2092 return;
2093
2094 create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
2095
2096 if (!cpudata)
2097 return;
2098
2099 cpu_bind_order_sort(cpudata,
2100 avail_sz,
2101 ERTS_CPU_BIND_NO_SPREAD,
2102 1);
2103
2104 avail = erts_alloc(ERTS_ALC_T_TMP,
2105 sizeof(erts_avail_cput)*avail_sz);
2106
2107 make_available_cpu_topology(&no, avail, cpudata,
2108 &avail_sz, test);
2109
2110 destroy_tmp_cpu_topology_copy(cpudata);
2111
2112 map->size = avail_sz*2+1;
2113
2114 map->array = erts_alloc(alc_type,
2115 (sizeof(erts_cpu_groups_map_array_t)
2116 * map->size));;
2117 map->logical_processors = avail_sz;
2118
2119 for (i = 0; i < map->size; i++) {
2120 map->array[i].logical = -1;
2121 map->array[i].cpu_group = -1;
2122 }
2123
2124 spread_level = ERTS_TOPOLOGY_CORE;
2125 for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
2126 if (no.level[i] > map->groups) {
2127 spread_level = i;
2128 break;
2129 }
2130 }
2131
2132 if (no.level[spread_level] <= map->groups) {
2133 int a, cg, last = -1;
2134 cg = -1;
2135 ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
2136 for (a = 0; a < avail_sz; a++) {
2137 if (last != avail[a].level[spread_level]) {
2138 cg++;
2139 last = avail[a].level[spread_level];
2140 }
2141 cpu_group_insert(map,
2142 avail[a].level[ERTS_TOPOLOGY_LOGICAL],
2143 cg);
2144 }
2145 }
2146 else { /* map->groups < no.level[spread_level] */
2147 erts_cpu_groups_count_t *cg_count;
2148 int a, cg, tl, toplevels;
2149
2150 tl = spread_level-1;
2151
2152 if (spread_level == ERTS_TOPOLOGY_NODE)
2153 toplevels = 1;
2154 else
2155 toplevels = no.level[tl];
2156
2157 cg_count = erts_alloc(ERTS_ALC_T_TMP,
2158 toplevels*sizeof(erts_cpu_groups_count_t));
2159
2160 if (toplevels == 1) {
2161 cg_count[0].id = 0;
2162 cg_count[0].sub_levels = no.level[spread_level];
2163 cg_count[0].cpu_groups = map->groups;
2164 }
2165 else {
2166 int cgs_per_tl, cgs;
2167 cgs = map->groups;
2168 cgs_per_tl = cgs / toplevels;
2169
2170 a = 0;
2171 for (i = 0; i < toplevels; i++) {
2172 cg_count[i].id = avail[a].level[tl];
2173 a = sub_levels(&cg_count[i], tl, a, avail_sz, avail);
2174 }
2175
2176 qsort(cg_count,
2177 toplevels,
2178 sizeof(erts_cpu_groups_count_t),
2179 cg_count_sub_levels_compare);
2180
2181 for (i = 0; i < toplevels; i++) {
2182 if (cg_count[i].sub_levels < cgs_per_tl) {
2183 cg_count[i].cpu_groups = cg_count[i].sub_levels;
2184 cgs -= cg_count[i].sub_levels;
2185 }
2186 else {
2187 cg_count[i].cpu_groups = cgs_per_tl;
2188 cgs -= cgs_per_tl;
2189 }
2190 }
2191
2192 while (cgs > 0) {
2193 for (i = 0; i < toplevels; i++) {
2194 if (cg_count[i].sub_levels == cg_count[i].cpu_groups)
2195 break;
2196 else {
2197 cg_count[i].cpu_groups++;
2198 if (--cgs == 0)
2199 break;
2200 }
2201 }
2202 }
2203
2204 qsort(cg_count,
2205 toplevels,
2206 sizeof(erts_cpu_groups_count_t),
2207 cg_count_id_compare);
2208 }
2209
2210 a = i = 0;
2211 cg = -1;
2212 while (a < avail_sz) {
2213 a = write_cpu_groups(&cg, &cg_count[i], tl,
2214 a, avail_sz, avail);
2215 i++;
2216 }
2217
2218 ASSERT(map->groups == cg + 1);
2219
2220 for (a = 0; a < avail_sz; a++)
2221 cpu_group_insert(map,
2222 avail[a].level[ERTS_TOPOLOGY_LOGICAL],
2223 avail[a].level[ERTS_TOPOLOGY_CG]);
2224
2225 erts_free(ERTS_ALC_T_TMP, cg_count);
2226 }
2227
2228 erts_free(ERTS_ALC_T_TMP, avail);
2229 }
2230
2231 static erts_cpu_groups_map_t *
add_cpu_groups(int groups,erts_cpu_groups_callback_t callback,void * arg)2232 add_cpu_groups(int groups,
2233 erts_cpu_groups_callback_t callback,
2234 void *arg)
2235 {
2236 int use_groups = groups;
2237 erts_cpu_groups_callback_list_t *cgcl;
2238 erts_cpu_groups_map_t *cgm;
2239
2240 ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
2241
2242 if (use_groups > max_main_threads)
2243 use_groups = max_main_threads;
2244
2245 if (!use_groups)
2246 return NULL;
2247
2248 no_cpu_groups_callbacks++;
2249 cgcl = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
2250 sizeof(erts_cpu_groups_callback_list_t));
2251 cgcl->callback = callback;
2252 cgcl->arg = arg;
2253
2254 for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
2255 if (cgm->groups == use_groups) {
2256 cgcl->next = cgm->callback_list;
2257 cgm->callback_list = cgcl;
2258 return cgm;
2259 }
2260 }
2261
2262
2263 cgm = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
2264 sizeof(erts_cpu_groups_map_t));
2265 cgm->next = cpu_groups_maps;
2266 cgm->groups = use_groups;
2267 cgm->array = NULL;
2268 cgm->size = 0;
2269 cgm->logical_processors = 0;
2270 cgm->callback_list = cgcl;
2271
2272 cgcl->next = NULL;
2273
2274 make_cpu_groups_map(cgm, 0);
2275
2276 cpu_groups_maps = cgm;
2277
2278 return cgm;
2279 }
2280
2281 static int
cpu_groups_lookup(erts_cpu_groups_map_t * map,ErtsSchedulerData * esdp)2282 cpu_groups_lookup(erts_cpu_groups_map_t *map,
2283 ErtsSchedulerData *esdp)
2284 {
2285 int start, logical, ix;
2286
2287 ERTS_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
2288 || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
2289
2290 if (esdp->cpu_id < 0)
2291 return (((int) esdp->no) - 1) % map->groups;
2292
2293 logical = esdp->cpu_id;
2294 start = logical % map->size;
2295 ix = start;
2296
2297 do {
2298 if (map->array[ix].logical == logical) {
2299 int group = map->array[ix].cpu_group;
2300 ASSERT(0 <= group && group < map->groups);
2301 return group;
2302 }
2303 ix++;
2304 if (ix == map->size)
2305 ix = 0;
2306 } while (ix != start);
2307
2308 erts_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
2309 }
2310
2311 static void
update_cpu_groups_maps(void)2312 update_cpu_groups_maps(void)
2313 {
2314 erts_cpu_groups_map_t *cgm;
2315 ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
2316
2317 for (cgm = cpu_groups_maps; cgm; cgm = cgm->next)
2318 make_cpu_groups_map(cgm, 0);
2319 }
2320