1 /*
2 Copyright (c) 2005-2020 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #include "../tbb/tbb_assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here.
18 #include "tbb/tbb_stddef.h"
19
20 #if _MSC_VER && !__INTEL_COMPILER
21 #pragma warning( push )
22 #pragma warning( disable : 4100 )
23 #endif
24 #include <hwloc.h>
25 #if _MSC_VER && !__INTEL_COMPILER
26 #pragma warning( pop )
27 #endif
28
29 #include <vector>
30
31 // Most of hwloc calls returns negative exit code on error.
32 // This macro tracks error codes that are returned from the hwloc interfaces.
33 #define assertion_hwloc_wrapper(command, ...) \
34 __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API.");
35
36 namespace tbb {
37 namespace internal {
38
39 //------------------------------------------------------------------------
40 // Information about the machine's hardware TBB is happen to work on
41 //------------------------------------------------------------------------
42 class platform_topology {
43 friend class numa_affinity_handler;
44
45 // TODO: add the `my_` prefix to the members
46 hwloc_topology_t topology;
47 hwloc_cpuset_t process_cpu_affinity_mask;
48 hwloc_nodeset_t process_node_affinity_mask;
49 std::vector<hwloc_cpuset_t> affinity_masks_list;
50
51 std::vector<int> default_concurrency_list;
52 std::vector<int> numa_indexes_list;
53 int numa_nodes_count;
54
55 enum init_stages { uninitialized,
56 started,
57 topology_allocated,
58 topology_loaded,
59 topology_parsed } initialization_state;
60
61 // Binding threads to NUMA nodes that locates in another Windows Processor groups
62 // is allowed only if machine topology contains several Windows Processors groups
63 // and process affinity mask wasn`t limited manually (affinity mask cannot violates
64 // processors group boundaries).
intergroup_binding_allowed(size_t groups_num)65 bool intergroup_binding_allowed(size_t groups_num) { return groups_num > 1; }
66
platform_topology()67 platform_topology() : topology(NULL),
68 process_cpu_affinity_mask(NULL),
69 process_node_affinity_mask(NULL),
70 numa_nodes_count(0),
71 initialization_state(uninitialized) {}
72
73 public:
74 typedef hwloc_cpuset_t affinity_mask;
75 typedef hwloc_const_cpuset_t const_affinity_mask;
76
instance()77 static platform_topology& instance() {
78 static platform_topology topology;
79 return topology;
80 }
81
is_topology_parsed()82 bool is_topology_parsed() { return initialization_state == topology_parsed; }
83
initialize(size_t groups_num)84 void initialize( size_t groups_num ) {
85 if ( initialization_state != uninitialized )
86 return;
87 initialization_state = started;
88
89 // Parse topology
90 if ( hwloc_topology_init( &topology ) == 0 ) {
91 initialization_state = topology_allocated;
92 if ( hwloc_topology_load( topology ) == 0 ) {
93 initialization_state = topology_loaded;
94 }
95 }
96
97 // Fill parameters with stubs if topology parsing is broken.
98 if ( initialization_state != topology_loaded ) {
99 if ( initialization_state == topology_allocated ) {
100 hwloc_topology_destroy(topology);
101 }
102 numa_nodes_count = 1;
103 numa_indexes_list.push_back(-1);
104 default_concurrency_list.push_back(-1);
105 return;
106 }
107
108 // Getting process affinity mask
109 if ( intergroup_binding_allowed(groups_num) ) {
110 process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology));
111 process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology));
112 } else {
113 process_cpu_affinity_mask = hwloc_bitmap_alloc();
114 process_node_affinity_mask = hwloc_bitmap_alloc();
115
116 assertion_hwloc_wrapper(hwloc_get_cpubind, topology, process_cpu_affinity_mask, 0);
117 hwloc_cpuset_to_nodeset(topology, process_cpu_affinity_mask, process_node_affinity_mask);
118 }
119
120 // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap.
121 // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check
122 // to change way of topology initialization.
123 if (hwloc_bitmap_weight(process_node_affinity_mask) < 0) {
124 numa_nodes_count = 1;
125 numa_indexes_list.push_back(0);
126 default_concurrency_list.push_back(hwloc_bitmap_weight(process_cpu_affinity_mask));
127
128 affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask));
129 initialization_state = topology_parsed;
130 return;
131 }
132
133 // Get number of available NUMA nodes
134 numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask);
135 __TBB_ASSERT(numa_nodes_count > 0, "Any system must contain one or more NUMA nodes");
136
137 // Get NUMA logical indexes list
138 unsigned counter = 0;
139 int i = 0;
140 int max_numa_index = -1;
141 numa_indexes_list.resize(numa_nodes_count);
142 hwloc_obj_t node_buffer;
143 hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
144 node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
145 numa_indexes_list[counter] = static_cast<int>(node_buffer->logical_index);
146
147 if ( numa_indexes_list[counter] > max_numa_index ) {
148 max_numa_index = numa_indexes_list[counter];
149 }
150
151 counter++;
152 } hwloc_bitmap_foreach_end();
153 __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative");
154
155 // Fill concurrency and affinity masks lists
156 default_concurrency_list.resize(max_numa_index + 1);
157 affinity_masks_list.resize(max_numa_index + 1);
158
159 int index = 0;
160 hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) {
161 node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i);
162 index = static_cast<int>(node_buffer->logical_index);
163
164 hwloc_cpuset_t& current_mask = affinity_masks_list[index];
165 current_mask = hwloc_bitmap_dup(node_buffer->cpuset);
166
167 hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
168 __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node");
169 default_concurrency_list[index] = hwloc_bitmap_weight(current_mask);
170 } hwloc_bitmap_foreach_end();
171 initialization_state = topology_parsed;
172 }
173
~platform_topology()174 ~platform_topology() {
175 if ( is_topology_parsed() ) {
176 for (int i = 0; i < numa_nodes_count; i++) {
177 hwloc_bitmap_free(affinity_masks_list[numa_indexes_list[i]]);
178 }
179 hwloc_bitmap_free(process_node_affinity_mask);
180 hwloc_bitmap_free(process_cpu_affinity_mask);
181 }
182
183 if ( initialization_state >= topology_allocated ) {
184 hwloc_topology_destroy(topology);
185 }
186
187 initialization_state = uninitialized;
188 }
189
fill(int & nodes_count,int * & indexes_list,int * & concurrency_list)190 void fill(int& nodes_count, int*& indexes_list, int*& concurrency_list ) {
191 __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
192 nodes_count = numa_nodes_count;
193 indexes_list = &numa_indexes_list.front();
194 concurrency_list = &default_concurrency_list.front();
195 }
196
allocate_process_affinity_mask()197 affinity_mask allocate_process_affinity_mask() {
198 __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology");
199 return hwloc_bitmap_dup(process_cpu_affinity_mask);
200 }
201
free_affinity_mask(affinity_mask mask_to_free)202 void free_affinity_mask( affinity_mask mask_to_free ) {
203 hwloc_bitmap_free(mask_to_free); // If bitmap is NULL, no operation is performed.
204 }
205
store_current_affinity_mask(affinity_mask current_mask)206 void store_current_affinity_mask( affinity_mask current_mask ) {
207 assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD);
208
209 hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask);
210 __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask),
211 "Current affinity mask must intersects with process affinity mask");
212 }
213
set_new_affinity_mask(const_affinity_mask new_mask)214 void set_new_affinity_mask( const_affinity_mask new_mask ) {
215 assertion_hwloc_wrapper(hwloc_set_cpubind, topology, new_mask, HWLOC_CPUBIND_THREAD);
216 }
217
get_node_affinity_mask(int node_index)218 const_affinity_mask get_node_affinity_mask( int node_index ) {
219 __TBB_ASSERT((int)affinity_masks_list.size() > node_index,
220 "Trying to get affinity mask for uninitialized NUMA node");
221 return affinity_masks_list[node_index];
222 }
223 };
224
225 class binding_handler {
226 // Following vector saves thread affinity mask on scheduler entry to return it to this thread
227 // on scheduler exit.
228 typedef std::vector<platform_topology::affinity_mask> affinity_masks_container;
229 affinity_masks_container affinity_backup;
230
231 public:
binding_handler(size_t size)232 binding_handler( size_t size ) : affinity_backup(size) {
233 for (affinity_masks_container::iterator it = affinity_backup.begin();
234 it != affinity_backup.end(); it++) {
235 *it = platform_topology::instance().allocate_process_affinity_mask();
236 }
237 }
238
~binding_handler()239 ~binding_handler() {
240 for (affinity_masks_container::iterator it = affinity_backup.begin();
241 it != affinity_backup.end(); it++) {
242 platform_topology::instance().free_affinity_mask(*it);
243 }
244 }
245
bind_thread_to_node(unsigned slot_num,unsigned numa_node_id)246 void bind_thread_to_node( unsigned slot_num, unsigned numa_node_id ) {
247 __TBB_ASSERT(slot_num < affinity_backup.size(),
248 "The slot number is greater than the number of slots in the arena");
249 __TBB_ASSERT(platform_topology::instance().is_topology_parsed(),
250 "Trying to get access to uninitialized platform_topology");
251 platform_topology::instance().store_current_affinity_mask(affinity_backup[slot_num]);
252
253 platform_topology::instance().set_new_affinity_mask(
254 platform_topology::instance().get_node_affinity_mask(numa_node_id));
255 }
256
restore_previous_affinity_mask(unsigned slot_num)257 void restore_previous_affinity_mask( unsigned slot_num ) {
258 __TBB_ASSERT(platform_topology::instance().is_topology_parsed(),
259 "Trying to get access to uninitialized platform_topology");
260 platform_topology::instance().set_new_affinity_mask(affinity_backup[slot_num]);
261 };
262
263 };
264
265 extern "C" { // exported to TBB interfaces
266
initialize_numa_topology(size_t groups_num,int & nodes_count,int * & indexes_list,int * & concurrency_list)267 void initialize_numa_topology( size_t groups_num,
268 int& nodes_count, int*& indexes_list, int*& concurrency_list ) {
269 platform_topology::instance().initialize(groups_num);
270 platform_topology::instance().fill(nodes_count, indexes_list, concurrency_list);
271 }
272
allocate_binding_handler(int slot_num)273 binding_handler* allocate_binding_handler(int slot_num) {
274 __TBB_ASSERT(slot_num > 0, "Trying to create numa handler for 0 threads.");
275 return new binding_handler(slot_num);
276 }
277
deallocate_binding_handler(binding_handler * handler_ptr)278 void deallocate_binding_handler(binding_handler* handler_ptr) {
279 __TBB_ASSERT(handler_ptr != NULL, "Trying to deallocate NULL pointer.");
280 delete handler_ptr;
281 }
282
bind_to_node(binding_handler * handler_ptr,int slot_num,int numa_id)283 void bind_to_node(binding_handler* handler_ptr, int slot_num, int numa_id) {
284 __TBB_ASSERT(handler_ptr != NULL, "Trying to get access to uninitialized metadata.");
285 __TBB_ASSERT(platform_topology::instance().is_topology_parsed(),
286 "Trying to get access to uninitialized platform_topology.");
287 handler_ptr->bind_thread_to_node(slot_num, numa_id);
288 }
289
restore_affinity(binding_handler * handler_ptr,int slot_num)290 void restore_affinity(binding_handler* handler_ptr, int slot_num) {
291 __TBB_ASSERT(handler_ptr != NULL, "Trying to get access to uninitialized metadata.");
292 __TBB_ASSERT(platform_topology::instance().is_topology_parsed(),
293 "Trying to get access to uninitialized platform_topology.");
294 handler_ptr->restore_previous_affinity_mask(slot_num);
295 }
296
297 } // extern "C"
298
299 } // namespace internal
300 } // namespace tbb
301
302 #undef assertion_hwloc_wrapper
303