1 /*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15
16 #include "kmp.h"
17 #include "kmp_os.h"
18
19 #if KMP_AFFINITY_SUPPORTED
20 #if KMP_USE_HWLOC
21 class KMPHwlocAffinity : public KMPAffinity {
22 public:
23 class Mask : public KMPAffinity::Mask {
24 hwloc_cpuset_t mask;
25
26 public:
Mask()27 Mask() {
28 mask = hwloc_bitmap_alloc();
29 this->zero();
30 }
~Mask()31 ~Mask() { hwloc_bitmap_free(mask); }
set(int i)32 void set(int i) override { hwloc_bitmap_set(mask, i); }
is_set(int i)33 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
clear(int i)34 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
zero()35 void zero() override { hwloc_bitmap_zero(mask); }
copy(const KMPAffinity::Mask * src)36 void copy(const KMPAffinity::Mask *src) override {
37 const Mask *convert = static_cast<const Mask *>(src);
38 hwloc_bitmap_copy(mask, convert->mask);
39 }
bitwise_and(const KMPAffinity::Mask * rhs)40 void bitwise_and(const KMPAffinity::Mask *rhs) override {
41 const Mask *convert = static_cast<const Mask *>(rhs);
42 hwloc_bitmap_and(mask, mask, convert->mask);
43 }
bitwise_or(const KMPAffinity::Mask * rhs)44 void bitwise_or(const KMPAffinity::Mask *rhs) override {
45 const Mask *convert = static_cast<const Mask *>(rhs);
46 hwloc_bitmap_or(mask, mask, convert->mask);
47 }
bitwise_not()48 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
begin()49 int begin() const override { return hwloc_bitmap_first(mask); }
end()50 int end() const override { return -1; }
next(int previous)51 int next(int previous) const override {
52 return hwloc_bitmap_next(mask, previous);
53 }
get_system_affinity(bool abort_on_error)54 int get_system_affinity(bool abort_on_error) override {
55 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56 "Illegal get affinity operation when not capable");
57 int retval =
58 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59 if (retval >= 0) {
60 return 0;
61 }
62 int error = errno;
63 if (abort_on_error) {
64 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65 }
66 return error;
67 }
set_system_affinity(bool abort_on_error)68 int set_system_affinity(bool abort_on_error) const override {
69 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70 "Illegal get affinity operation when not capable");
71 int retval =
72 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73 if (retval >= 0) {
74 return 0;
75 }
76 int error = errno;
77 if (abort_on_error) {
78 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79 }
80 return error;
81 }
get_proc_group()82 int get_proc_group() const override {
83 int group = -1;
84 #if KMP_OS_WINDOWS
85 if (__kmp_num_proc_groups == 1) {
86 return 1;
87 }
88 for (int i = 0; i < __kmp_num_proc_groups; i++) {
89 // On windows, the long type is always 32 bits
90 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91 unsigned long second_32_bits =
92 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93 if (first_32_bits == 0 && second_32_bits == 0) {
94 continue;
95 }
96 if (group >= 0) {
97 return -1;
98 }
99 group = i;
100 }
101 #endif /* KMP_OS_WINDOWS */
102 return group;
103 }
104 };
determine_capable(const char * var)105 void determine_capable(const char *var) override {
106 const hwloc_topology_support *topology_support;
107 if (__kmp_hwloc_topology == NULL) {
108 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109 __kmp_hwloc_error = TRUE;
110 if (__kmp_affinity_verbose)
111 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112 }
113 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114 __kmp_hwloc_error = TRUE;
115 if (__kmp_affinity_verbose)
116 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117 }
118 }
119 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120 // Is the system capable of setting/getting this thread's affinity?
121 // Also, is topology discovery possible? (pu indicates ability to discover
122 // processing units). And finally, were there no errors when calling any
123 // hwloc_* API functions?
124 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125 topology_support->cpubind->get_thisthread_cpubind &&
126 topology_support->discovery->pu && !__kmp_hwloc_error) {
127 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128 KMP_AFFINITY_ENABLE(TRUE);
129 } else {
130 // indicate that hwloc didn't work and disable affinity
131 __kmp_hwloc_error = TRUE;
132 KMP_AFFINITY_DISABLE();
133 }
134 }
bind_thread(int which)135 void bind_thread(int which) override {
136 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137 "Illegal set affinity operation when not capable");
138 KMPAffinity::Mask *mask;
139 KMP_CPU_ALLOC_ON_STACK(mask);
140 KMP_CPU_ZERO(mask);
141 KMP_CPU_SET(which, mask);
142 __kmp_set_system_affinity(mask, TRUE);
143 KMP_CPU_FREE_FROM_STACK(mask);
144 }
allocate_mask()145 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
deallocate_mask(KMPAffinity::Mask * m)146 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
allocate_mask_array(int num)147 KMPAffinity::Mask *allocate_mask_array(int num) override {
148 return new Mask[num];
149 }
deallocate_mask_array(KMPAffinity::Mask * array)150 void deallocate_mask_array(KMPAffinity::Mask *array) override {
151 Mask *hwloc_array = static_cast<Mask *>(array);
152 delete[] hwloc_array;
153 }
index_mask_array(KMPAffinity::Mask * array,int index)154 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155 int index) override {
156 Mask *hwloc_array = static_cast<Mask *>(array);
157 return &(hwloc_array[index]);
158 }
get_api_type()159 api_type get_api_type() const override { return HWLOC; }
160 };
161 #endif /* KMP_USE_HWLOC */
162
163 #if KMP_OS_LINUX
164 /* On some of the older OS's that we build on, these constants aren't present
165 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
166 all systems of the same arch where they are defined, and they cannot change.
167 stone forever. */
168 #include <sys/syscall.h>
169 #if KMP_ARCH_X86 || KMP_ARCH_ARM
170 #ifndef __NR_sched_setaffinity
171 #define __NR_sched_setaffinity 241
172 #elif __NR_sched_setaffinity != 241
173 #error Wrong code for setaffinity system call.
174 #endif /* __NR_sched_setaffinity */
175 #ifndef __NR_sched_getaffinity
176 #define __NR_sched_getaffinity 242
177 #elif __NR_sched_getaffinity != 242
178 #error Wrong code for getaffinity system call.
179 #endif /* __NR_sched_getaffinity */
180 #elif KMP_ARCH_AARCH64
181 #ifndef __NR_sched_setaffinity
182 #define __NR_sched_setaffinity 122
183 #elif __NR_sched_setaffinity != 122
184 #error Wrong code for setaffinity system call.
185 #endif /* __NR_sched_setaffinity */
186 #ifndef __NR_sched_getaffinity
187 #define __NR_sched_getaffinity 123
188 #elif __NR_sched_getaffinity != 123
189 #error Wrong code for getaffinity system call.
190 #endif /* __NR_sched_getaffinity */
191 #elif KMP_ARCH_X86_64
192 #ifndef __NR_sched_setaffinity
193 #define __NR_sched_setaffinity 203
194 #elif __NR_sched_setaffinity != 203
195 #error Wrong code for setaffinity system call.
196 #endif /* __NR_sched_setaffinity */
197 #ifndef __NR_sched_getaffinity
198 #define __NR_sched_getaffinity 204
199 #elif __NR_sched_getaffinity != 204
200 #error Wrong code for getaffinity system call.
201 #endif /* __NR_sched_getaffinity */
202 #elif KMP_ARCH_PPC64
203 #ifndef __NR_sched_setaffinity
204 #define __NR_sched_setaffinity 222
205 #elif __NR_sched_setaffinity != 222
206 #error Wrong code for setaffinity system call.
207 #endif /* __NR_sched_setaffinity */
208 #ifndef __NR_sched_getaffinity
209 #define __NR_sched_getaffinity 223
210 #elif __NR_sched_getaffinity != 223
211 #error Wrong code for getaffinity system call.
212 #endif /* __NR_sched_getaffinity */
213 #elif KMP_ARCH_MIPS
214 #ifndef __NR_sched_setaffinity
215 #define __NR_sched_setaffinity 4239
216 #elif __NR_sched_setaffinity != 4239
217 #error Wrong code for setaffinity system call.
218 #endif /* __NR_sched_setaffinity */
219 #ifndef __NR_sched_getaffinity
220 #define __NR_sched_getaffinity 4240
221 #elif __NR_sched_getaffinity != 4240
222 #error Wrong code for getaffinity system call.
223 #endif /* __NR_sched_getaffinity */
224 #elif KMP_ARCH_MIPS64
225 #ifndef __NR_sched_setaffinity
226 #define __NR_sched_setaffinity 5195
227 #elif __NR_sched_setaffinity != 5195
228 #error Wrong code for setaffinity system call.
229 #endif /* __NR_sched_setaffinity */
230 #ifndef __NR_sched_getaffinity
231 #define __NR_sched_getaffinity 5196
232 #elif __NR_sched_getaffinity != 5196
233 #error Wrong code for getaffinity system call.
234 #endif /* __NR_sched_getaffinity */
235 #error Unknown or unsupported architecture
236 #endif /* KMP_ARCH_* */
237 class KMPNativeAffinity : public KMPAffinity {
238 class Mask : public KMPAffinity::Mask {
239 typedef unsigned char mask_t;
240 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
241
242 public:
243 mask_t *mask;
Mask()244 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
~Mask()245 ~Mask() {
246 if (mask)
247 __kmp_free(mask);
248 }
set(int i)249 void set(int i) override {
250 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
251 }
is_set(int i)252 bool is_set(int i) const override {
253 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
254 }
clear(int i)255 void clear(int i) override {
256 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
257 }
zero()258 void zero() override {
259 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
260 mask[i] = 0;
261 }
copy(const KMPAffinity::Mask * src)262 void copy(const KMPAffinity::Mask *src) override {
263 const Mask *convert = static_cast<const Mask *>(src);
264 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
265 mask[i] = convert->mask[i];
266 }
bitwise_and(const KMPAffinity::Mask * rhs)267 void bitwise_and(const KMPAffinity::Mask *rhs) override {
268 const Mask *convert = static_cast<const Mask *>(rhs);
269 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
270 mask[i] &= convert->mask[i];
271 }
bitwise_or(const KMPAffinity::Mask * rhs)272 void bitwise_or(const KMPAffinity::Mask *rhs) override {
273 const Mask *convert = static_cast<const Mask *>(rhs);
274 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
275 mask[i] |= convert->mask[i];
276 }
bitwise_not()277 void bitwise_not() override {
278 for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
279 mask[i] = ~(mask[i]);
280 }
begin()281 int begin() const override {
282 int retval = 0;
283 while (retval < end() && !is_set(retval))
284 ++retval;
285 return retval;
286 }
end()287 int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
next(int previous)288 int next(int previous) const override {
289 int retval = previous + 1;
290 while (retval < end() && !is_set(retval))
291 ++retval;
292 return retval;
293 }
get_system_affinity(bool abort_on_error)294 int get_system_affinity(bool abort_on_error) override {
295 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
296 "Illegal get affinity operation when not capable");
297 int retval =
298 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
299 if (retval >= 0) {
300 return 0;
301 }
302 int error = errno;
303 if (abort_on_error) {
304 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
305 }
306 return error;
307 }
set_system_affinity(bool abort_on_error)308 int set_system_affinity(bool abort_on_error) const override {
309 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
310 "Illegal get affinity operation when not capable");
311 int retval =
312 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
313 if (retval >= 0) {
314 return 0;
315 }
316 int error = errno;
317 if (abort_on_error) {
318 __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
319 }
320 return error;
321 }
322 };
determine_capable(const char * env_var)323 void determine_capable(const char *env_var) override {
324 __kmp_affinity_determine_capable(env_var);
325 }
bind_thread(int which)326 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
allocate_mask()327 KMPAffinity::Mask *allocate_mask() override {
328 KMPNativeAffinity::Mask *retval = new Mask();
329 return retval;
330 }
deallocate_mask(KMPAffinity::Mask * m)331 void deallocate_mask(KMPAffinity::Mask *m) override {
332 KMPNativeAffinity::Mask *native_mask =
333 static_cast<KMPNativeAffinity::Mask *>(m);
334 delete native_mask;
335 }
allocate_mask_array(int num)336 KMPAffinity::Mask *allocate_mask_array(int num) override {
337 return new Mask[num];
338 }
deallocate_mask_array(KMPAffinity::Mask * array)339 void deallocate_mask_array(KMPAffinity::Mask *array) override {
340 Mask *linux_array = static_cast<Mask *>(array);
341 delete[] linux_array;
342 }
index_mask_array(KMPAffinity::Mask * array,int index)343 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
344 int index) override {
345 Mask *linux_array = static_cast<Mask *>(array);
346 return &(linux_array[index]);
347 }
get_api_type()348 api_type get_api_type() const override { return NATIVE_OS; }
349 };
350 #endif /* KMP_OS_LINUX */
351
352 #if KMP_OS_WINDOWS
353 class KMPNativeAffinity : public KMPAffinity {
354 class Mask : public KMPAffinity::Mask {
355 typedef ULONG_PTR mask_t;
356 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
357 mask_t *mask;
358
359 public:
Mask()360 Mask() {
361 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
362 }
~Mask()363 ~Mask() {
364 if (mask)
365 __kmp_free(mask);
366 }
set(int i)367 void set(int i) override {
368 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
369 }
is_set(int i)370 bool is_set(int i) const override {
371 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
372 }
clear(int i)373 void clear(int i) override {
374 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
375 }
zero()376 void zero() override {
377 for (int i = 0; i < __kmp_num_proc_groups; ++i)
378 mask[i] = 0;
379 }
copy(const KMPAffinity::Mask * src)380 void copy(const KMPAffinity::Mask *src) override {
381 const Mask *convert = static_cast<const Mask *>(src);
382 for (int i = 0; i < __kmp_num_proc_groups; ++i)
383 mask[i] = convert->mask[i];
384 }
bitwise_and(const KMPAffinity::Mask * rhs)385 void bitwise_and(const KMPAffinity::Mask *rhs) override {
386 const Mask *convert = static_cast<const Mask *>(rhs);
387 for (int i = 0; i < __kmp_num_proc_groups; ++i)
388 mask[i] &= convert->mask[i];
389 }
bitwise_or(const KMPAffinity::Mask * rhs)390 void bitwise_or(const KMPAffinity::Mask *rhs) override {
391 const Mask *convert = static_cast<const Mask *>(rhs);
392 for (int i = 0; i < __kmp_num_proc_groups; ++i)
393 mask[i] |= convert->mask[i];
394 }
bitwise_not()395 void bitwise_not() override {
396 for (int i = 0; i < __kmp_num_proc_groups; ++i)
397 mask[i] = ~(mask[i]);
398 }
begin()399 int begin() const override {
400 int retval = 0;
401 while (retval < end() && !is_set(retval))
402 ++retval;
403 return retval;
404 }
end()405 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
next(int previous)406 int next(int previous) const override {
407 int retval = previous + 1;
408 while (retval < end() && !is_set(retval))
409 ++retval;
410 return retval;
411 }
set_system_affinity(bool abort_on_error)412 int set_system_affinity(bool abort_on_error) const override {
413 if (__kmp_num_proc_groups > 1) {
414 // Check for a valid mask.
415 GROUP_AFFINITY ga;
416 int group = get_proc_group();
417 if (group < 0) {
418 if (abort_on_error) {
419 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
420 }
421 return -1;
422 }
423 // Transform the bit vector into a GROUP_AFFINITY struct
424 // and make the system call to set affinity.
425 ga.Group = group;
426 ga.Mask = mask[group];
427 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
428
429 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
430 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
431 DWORD error = GetLastError();
432 if (abort_on_error) {
433 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
434 __kmp_msg_null);
435 }
436 return error;
437 }
438 } else {
439 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
440 DWORD error = GetLastError();
441 if (abort_on_error) {
442 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
443 __kmp_msg_null);
444 }
445 return error;
446 }
447 }
448 return 0;
449 }
get_system_affinity(bool abort_on_error)450 int get_system_affinity(bool abort_on_error) override {
451 if (__kmp_num_proc_groups > 1) {
452 this->zero();
453 GROUP_AFFINITY ga;
454 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
455 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
456 DWORD error = GetLastError();
457 if (abort_on_error) {
458 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
459 KMP_ERR(error), __kmp_msg_null);
460 }
461 return error;
462 }
463 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
464 (ga.Mask == 0)) {
465 return -1;
466 }
467 mask[ga.Group] = ga.Mask;
468 } else {
469 mask_t newMask, sysMask, retval;
470 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
471 DWORD error = GetLastError();
472 if (abort_on_error) {
473 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
474 KMP_ERR(error), __kmp_msg_null);
475 }
476 return error;
477 }
478 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
479 if (!retval) {
480 DWORD error = GetLastError();
481 if (abort_on_error) {
482 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
483 KMP_ERR(error), __kmp_msg_null);
484 }
485 return error;
486 }
487 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
488 if (!newMask) {
489 DWORD error = GetLastError();
490 if (abort_on_error) {
491 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
492 KMP_ERR(error), __kmp_msg_null);
493 }
494 }
495 *mask = retval;
496 }
497 return 0;
498 }
get_proc_group()499 int get_proc_group() const override {
500 int group = -1;
501 if (__kmp_num_proc_groups == 1) {
502 return 1;
503 }
504 for (int i = 0; i < __kmp_num_proc_groups; i++) {
505 if (mask[i] == 0)
506 continue;
507 if (group >= 0)
508 return -1;
509 group = i;
510 }
511 return group;
512 }
513 };
determine_capable(const char * env_var)514 void determine_capable(const char *env_var) override {
515 __kmp_affinity_determine_capable(env_var);
516 }
bind_thread(int which)517 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
allocate_mask()518 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
deallocate_mask(KMPAffinity::Mask * m)519 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
allocate_mask_array(int num)520 KMPAffinity::Mask *allocate_mask_array(int num) override {
521 return new Mask[num];
522 }
deallocate_mask_array(KMPAffinity::Mask * array)523 void deallocate_mask_array(KMPAffinity::Mask *array) override {
524 Mask *windows_array = static_cast<Mask *>(array);
525 delete[] windows_array;
526 }
index_mask_array(KMPAffinity::Mask * array,int index)527 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
528 int index) override {
529 Mask *windows_array = static_cast<Mask *>(array);
530 return &(windows_array[index]);
531 }
get_api_type()532 api_type get_api_type() const override { return NATIVE_OS; }
533 };
534 #endif /* KMP_OS_WINDOWS */
535 #endif /* KMP_AFFINITY_SUPPORTED */
536
537 class Address {
538 public:
539 static const unsigned maxDepth = 32;
540 unsigned labels[maxDepth];
541 unsigned childNums[maxDepth];
542 unsigned depth;
543 unsigned leader;
Address(unsigned _depth)544 Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
545 Address &operator=(const Address &b) {
546 depth = b.depth;
547 for (unsigned i = 0; i < depth; i++) {
548 labels[i] = b.labels[i];
549 childNums[i] = b.childNums[i];
550 }
551 leader = FALSE;
552 return *this;
553 }
554 bool operator==(const Address &b) const {
555 if (depth != b.depth)
556 return false;
557 for (unsigned i = 0; i < depth; i++)
558 if (labels[i] != b.labels[i])
559 return false;
560 return true;
561 }
isClose(const Address & b,int level)562 bool isClose(const Address &b, int level) const {
563 if (depth != b.depth)
564 return false;
565 if ((unsigned)level >= depth)
566 return true;
567 for (unsigned i = 0; i < (depth - level); i++)
568 if (labels[i] != b.labels[i])
569 return false;
570 return true;
571 }
572 bool operator!=(const Address &b) const { return !operator==(b); }
print()573 void print() const {
574 unsigned i;
575 printf("Depth: %u --- ", depth);
576 for (i = 0; i < depth; i++) {
577 printf("%u ", labels[i]);
578 }
579 }
580 };
581
582 class AddrUnsPair {
583 public:
584 Address first;
585 unsigned second;
AddrUnsPair(Address _first,unsigned _second)586 AddrUnsPair(Address _first, unsigned _second)
587 : first(_first), second(_second) {}
588 AddrUnsPair &operator=(const AddrUnsPair &b) {
589 first = b.first;
590 second = b.second;
591 return *this;
592 }
print()593 void print() const {
594 printf("first = ");
595 first.print();
596 printf(" --- second = %u", second);
597 }
598 bool operator==(const AddrUnsPair &b) const {
599 if (first != b.first)
600 return false;
601 if (second != b.second)
602 return false;
603 return true;
604 }
605 bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
606 };
607
__kmp_affinity_cmp_Address_labels(const void * a,const void * b)608 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
609 const Address *aa = &(((const AddrUnsPair *)a)->first);
610 const Address *bb = &(((const AddrUnsPair *)b)->first);
611 unsigned depth = aa->depth;
612 unsigned i;
613 KMP_DEBUG_ASSERT(depth == bb->depth);
614 for (i = 0; i < depth; i++) {
615 if (aa->labels[i] < bb->labels[i])
616 return -1;
617 if (aa->labels[i] > bb->labels[i])
618 return 1;
619 }
620 return 0;
621 }
622
623 /* A structure for holding machine-specific hierarchy info to be computed once
624 at init. This structure represents a mapping of threads to the actual machine
625 hierarchy, or to our best guess at what the hierarchy might be, for the
626 purpose of performing an efficient barrier. In the worst case, when there is
627 no machine hierarchy information, it produces a tree suitable for a barrier,
628 similar to the tree used in the hyper barrier. */
629 class hierarchy_info {
630 public:
631 /* Good default values for number of leaves and branching factor, given no
632 affinity information. Behaves a bit like hyper barrier. */
633 static const kmp_uint32 maxLeaves = 4;
634 static const kmp_uint32 minBranch = 4;
635 /** Number of levels in the hierarchy. Typical levels are threads/core,
636 cores/package or socket, packages/node, nodes/machine, etc. We don't want
637 to get specific with nomenclature. When the machine is oversubscribed we
638 add levels to duplicate the hierarchy, doubling the thread capacity of the
639 hierarchy each time we add a level. */
640 kmp_uint32 maxLevels;
641
642 /** This is specifically the depth of the machine configuration hierarchy, in
643 terms of the number of levels along the longest path from root to any
644 leaf. It corresponds to the number of entries in numPerLevel if we exclude
645 all but one trailing 1. */
646 kmp_uint32 depth;
647 kmp_uint32 base_num_threads;
648 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
649 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
650 // 2=initialization in progress
651 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
652
653 /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
654 the parent of a node at level i has. For example, if we have a machine
655 with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
656 {2, 4, 4, 1, 1}. All empty levels are set to 1. */
657 kmp_uint32 *numPerLevel;
658 kmp_uint32 *skipPerLevel;
659
deriveLevels(AddrUnsPair * adr2os,int num_addrs)660 void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
661 int hier_depth = adr2os[0].first.depth;
662 int level = 0;
663 for (int i = hier_depth - 1; i >= 0; --i) {
664 int max = -1;
665 for (int j = 0; j < num_addrs; ++j) {
666 int next = adr2os[j].first.childNums[i];
667 if (next > max)
668 max = next;
669 }
670 numPerLevel[level] = max + 1;
671 ++level;
672 }
673 }
674
hierarchy_info()675 hierarchy_info()
676 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
677
fini()678 void fini() {
679 if (!uninitialized && numPerLevel) {
680 __kmp_free(numPerLevel);
681 numPerLevel = NULL;
682 uninitialized = not_initialized;
683 }
684 }
685
init(AddrUnsPair * adr2os,int num_addrs)686 void init(AddrUnsPair *adr2os, int num_addrs) {
687 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
688 &uninitialized, not_initialized, initializing);
689 if (bool_result == 0) { // Wait for initialization
690 while (TCR_1(uninitialized) != initialized)
691 KMP_CPU_PAUSE();
692 return;
693 }
694 KMP_DEBUG_ASSERT(bool_result == 1);
695
696 /* Added explicit initialization of the data fields here to prevent usage of
697 dirty value observed when static library is re-initialized multiple times
698 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
699 OpenMP). */
700 depth = 1;
701 resizing = 0;
702 maxLevels = 7;
703 numPerLevel =
704 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
705 skipPerLevel = &(numPerLevel[maxLevels]);
706 for (kmp_uint32 i = 0; i < maxLevels;
707 ++i) { // init numPerLevel[*] to 1 item per level
708 numPerLevel[i] = 1;
709 skipPerLevel[i] = 1;
710 }
711
712 // Sort table by physical ID
713 if (adr2os) {
714 qsort(adr2os, num_addrs, sizeof(*adr2os),
715 __kmp_affinity_cmp_Address_labels);
716 deriveLevels(adr2os, num_addrs);
717 } else {
718 numPerLevel[0] = maxLeaves;
719 numPerLevel[1] = num_addrs / maxLeaves;
720 if (num_addrs % maxLeaves)
721 numPerLevel[1]++;
722 }
723
724 base_num_threads = num_addrs;
725 for (int i = maxLevels - 1; i >= 0;
726 --i) // count non-empty levels to get depth
727 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
728 depth++;
729
730 kmp_uint32 branch = minBranch;
731 if (numPerLevel[0] == 1)
732 branch = num_addrs / maxLeaves;
733 if (branch < minBranch)
734 branch = minBranch;
735 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
736 while (numPerLevel[d] > branch ||
737 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
738 if (numPerLevel[d] & 1)
739 numPerLevel[d]++;
740 numPerLevel[d] = numPerLevel[d] >> 1;
741 if (numPerLevel[d + 1] == 1)
742 depth++;
743 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
744 }
745 if (numPerLevel[0] == 1) {
746 branch = branch >> 1;
747 if (branch < 4)
748 branch = minBranch;
749 }
750 }
751
752 for (kmp_uint32 i = 1; i < depth; ++i)
753 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
754 // Fill in hierarchy in the case of oversubscription
755 for (kmp_uint32 i = depth; i < maxLevels; ++i)
756 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
757
758 uninitialized = initialized; // One writer
759 }
760
761 // Resize the hierarchy if nproc changes to something larger than before
resize(kmp_uint32 nproc)762 void resize(kmp_uint32 nproc) {
763 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
764 while (bool_result == 0) { // someone else is trying to resize
765 KMP_CPU_PAUSE();
766 if (nproc <= base_num_threads) // happy with other thread's resize
767 return;
768 else // try to resize
769 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
770 }
771 KMP_DEBUG_ASSERT(bool_result != 0);
772 if (nproc <= base_num_threads)
773 return; // happy with other thread's resize
774
775 // Calculate new maxLevels
776 kmp_uint32 old_sz = skipPerLevel[depth - 1];
777 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
778 // First see if old maxLevels is enough to contain new size
779 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
780 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
781 numPerLevel[i - 1] *= 2;
782 old_sz *= 2;
783 depth++;
784 }
785 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
786 while (nproc > old_sz) {
787 old_sz *= 2;
788 incs++;
789 depth++;
790 }
791 maxLevels += incs;
792
793 // Resize arrays
794 kmp_uint32 *old_numPerLevel = numPerLevel;
795 kmp_uint32 *old_skipPerLevel = skipPerLevel;
796 numPerLevel = skipPerLevel = NULL;
797 numPerLevel =
798 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
799 skipPerLevel = &(numPerLevel[maxLevels]);
800
801 // Copy old elements from old arrays
802 for (kmp_uint32 i = 0; i < old_maxLevels;
803 ++i) { // init numPerLevel[*] to 1 item per level
804 numPerLevel[i] = old_numPerLevel[i];
805 skipPerLevel[i] = old_skipPerLevel[i];
806 }
807
808 // Init new elements in arrays to 1
809 for (kmp_uint32 i = old_maxLevels; i < maxLevels;
810 ++i) { // init numPerLevel[*] to 1 item per level
811 numPerLevel[i] = 1;
812 skipPerLevel[i] = 1;
813 }
814
815 // Free old arrays
816 __kmp_free(old_numPerLevel);
817 }
818
819 // Fill in oversubscription levels of hierarchy
820 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
821 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
822
823 base_num_threads = nproc;
824 resizing = 0; // One writer
825 }
826 };
827 #endif // KMP_AFFINITY_H
828