1 //  Copyright (c) 2007-2017 Hartmut Kaiser
2 //  Copyright (c) 2008-2009 Chirag Dekate, Anshul Tandon
3 //  Copyright (c) 2012-2013 Thomas Heller
4 //
5 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
6 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #include <hpx/runtime/threads/topology.hpp>
9 
10 #include <hpx/compat/thread.hpp>
11 #include <hpx/error_code.hpp>
12 #include <hpx/exception.hpp>
13 #include <hpx/throw_exception.hpp>
14 #include <hpx/util/assert.hpp>
15 #include <hpx/util/format.hpp>
16 #include <hpx/util/logging.hpp>
17 #include <hpx/util/spinlock.hpp>
18 #include <hpx/runtime.hpp>
19 #include <hpx/runtime/naming/address.hpp>
20 #include <hpx/runtime/threads/cpu_mask.hpp>
21 #include <hpx/runtime/threads/topology.hpp>
22 
23 #include <boost/io/ios_state.hpp>
24 #include <boost/scoped_ptr.hpp>
25 
26 #include <cstddef>
27 #include <iomanip>
28 #include <iostream>
29 #include <mutex>
30 #include <string>
31 #include <vector>
32 #include <memory>
33 
34 #include <errno.h>
35 
36 #include <hwloc.h>
37 
38 #if HWLOC_API_VERSION < 0x00010b00
39 # define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
40 #endif
41 
42 #if defined(__ANDROID__) && defined(ANDROID)
43 #include <cpu-features.h>
44 #endif
45 
46 #if defined(__bgq__)
47 #include <hwi/include/bqc/A2_inlines.h>
48 #endif
49 
50 #if defined(_POSIX_VERSION)
51 #include <sys/syscall.h>
52 #include <sys/resource.h>
53 #endif
54 
55 namespace hpx { namespace threads { namespace detail
56 {
hwloc_hardware_concurrency()57     std::size_t hwloc_hardware_concurrency()
58     {
59         threads::topology& top = threads::create_topology();
60         return top.get_number_of_pus();
61     }
62 
write_to_log(char const * valuename,std::size_t value)63     void write_to_log(char const* valuename, std::size_t value)
64     {
65         LTM_(debug) << "topology: "
66                     << valuename << ": " << value; //-V128
67     }
68 
write_to_log_mask(char const * valuename,mask_cref_type value)69     void write_to_log_mask(char const* valuename, mask_cref_type value)
70     {
71         LTM_(debug) << "topology: " << valuename
72                     << ": " HPX_CPU_MASK_PREFIX
73                     << std::hex << value;
74     }
75 
write_to_log(char const * valuename,std::vector<std::size_t> const & values)76     void write_to_log(char const* valuename,
77         std::vector<std::size_t> const& values)
78     {
79         LTM_(debug) << "topology: "
80                     << valuename << "s, size: " //-V128
81                     << values.size();
82 
83         std::size_t i = 0;
84         for (std::size_t value : values)
85         {
86             LTM_(debug) << "topology: " << valuename //-V128
87                         << "(" << i++ << "): " << value;
88         }
89     }
90 
write_to_log_mask(char const * valuename,std::vector<mask_type> const & values)91     void write_to_log_mask(char const* valuename,
92         std::vector<mask_type> const& values)
93     {
94         LTM_(debug) << "topology: "
95                     << valuename << "s, size: " //-V128
96                     << values.size();
97 
98         std::size_t i = 0;
99         for (mask_cref_type value : values)
100         {
101             LTM_(debug) << "topology: " << valuename //-V128
102                         << "(" << i++ << "): " HPX_CPU_MASK_PREFIX
103                         << std::hex << value;
104         }
105     }
106 
get_index(hwloc_obj_t obj)107     std::size_t get_index(hwloc_obj_t obj)
108     {
109         // on Windows logical_index is always -1
110         if (obj->logical_index == ~0x0u)
111             return static_cast<std::size_t>(obj->os_index);
112 
113         return static_cast<std::size_t>(obj->logical_index);
114     }
115 
adjust_node_obj(hwloc_obj_t node)116     hwloc_obj_t adjust_node_obj(hwloc_obj_t node) noexcept
117     {
118 #if HWLOC_API_VERSION >= 0x00020000
119         // www.open-mpi.org/projects/hwloc/doc/hwloc-v2.0.0-letter.pdf:
120         // Starting with hwloc v2.0, NUMA nodes are not in the main tree
121         // anymore. They are attached under objects as Memory Children
122         // on the side of normal children.
123         while (hwloc_obj_type_is_memory(node->type))
124                 node = node->parent;
125         HPX_ASSERT(node);
126 #endif
127         return node;
128     }
129 }}}
130 
131 namespace hpx { namespace threads
132 {
133     ///////////////////////////////////////////////////////////////////////////
operator <<(std::ostream & os,hpx_hwloc_bitmap_wrapper const * bmp)134     std::ostream& operator<<(std::ostream& os, hpx_hwloc_bitmap_wrapper const* bmp)
135     {
136         char buffer[256];
137         hwloc_bitmap_snprintf(buffer, 256, bmp->bmp_);
138         os << buffer;
139         return os;
140     }
141 
142     ///////////////////////////////////////////////////////////////////////////
get_service_affinity_mask(mask_cref_type used_processing_units,error_code & ec) const143     mask_type topology::get_service_affinity_mask(
144         mask_cref_type used_processing_units, error_code& ec) const
145     {
146         // We bind the service threads to the first NUMA domain. This is useful
147         // as the first NUMA domain is likely to have the PCI controllers etc.
148         mask_cref_type machine_mask = this->get_numa_node_affinity_mask(0, ec);
149         if (ec || !any(machine_mask))
150             return mask_type();
151 
152         if (&ec != &throws)
153             ec = make_success_code();
154 
155         mask_type res = ~used_processing_units & machine_mask;
156 
157         return (!any(res)) ? machine_mask : res;
158     }
159 
reduce_thread_priority(error_code & ec) const160     bool topology::reduce_thread_priority(error_code& ec) const
161     {
162 #ifdef HPX_HAVE_NICE_THREADLEVEL
163 #if defined(__linux__) && !defined(__ANDROID__) && !defined(__bgq__)
164         pid_t tid;
165         tid = syscall(SYS_gettid);
166         if (setpriority(PRIO_PROCESS, tid, 19))
167         {
168             HPX_THROWS_IF(ec, no_success, "topology::reduce_thread_priority",
169                 "setpriority returned an error");
170             return false;
171         }
172 #elif defined(WIN32) || defined(_WIN32) || defined(__WIN32__)
173 
174         if (!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_LOWEST))
175         {
176             HPX_THROWS_IF(ec, no_success, "topology::reduce_thread_priority",
177                 "SetThreadPriority returned an error");
178             return false;
179         }
180 #elif defined(__bgq__)
181         ThreadPriority_Low();
182 #endif
183 #endif
184         return true;
185     }
186 
187     ///////////////////////////////////////////////////////////////////////////
188     mask_type topology::empty_mask = mask_type();
189 
topology()190     topology::topology()
191       : topo(nullptr), machine_affinity_mask_(0)
192     { // {{{
193         int err = hwloc_topology_init(&topo);
194         if (err != 0)
195         {
196             HPX_THROW_EXCEPTION(no_success,
197                 "topology::topology",
198                 "Failed to init hwloc topology");
199         }
200 
201         err = hwloc_topology_load(topo);
202         if (err != 0)
203         {
204             HPX_THROW_EXCEPTION(no_success,
205                 "topology::topology",
206                 "Failed to load hwloc topology");
207         }
208 
209         init_num_of_pus();
210 
211         socket_numbers_.reserve(num_of_pus_);
212         numa_node_numbers_.reserve(num_of_pus_);
213         core_numbers_.reserve(num_of_pus_);
214 
215         // Initialize each set of data entirely, as some of the initialization
216         // routines rely on access to other pieces of topology data. The
217         // compiler will optimize the loops where possible anyways.
218 
219         std::size_t num_of_sockets = get_number_of_sockets();
220         if (num_of_sockets == 0) num_of_sockets = 1;
221 
222         for (std::size_t i = 0; i < num_of_pus_; ++i)
223         {
224             std::size_t socket = init_socket_number(i);
225             HPX_ASSERT(socket < num_of_sockets);
226             socket_numbers_.push_back(socket);
227         }
228 
229         std::size_t num_of_nodes = get_number_of_numa_nodes();
230         if (num_of_nodes == 0) num_of_nodes = 1;
231 
232         for (std::size_t i = 0; i < num_of_pus_; ++i)
233         {
234             std::size_t numa_node = init_numa_node_number(i);
235             HPX_ASSERT(numa_node < num_of_nodes);
236             numa_node_numbers_.push_back(numa_node);
237         }
238 
239         std::size_t num_of_cores = get_number_of_cores();
240         if (num_of_cores == 0) num_of_cores = 1;
241 
242         for (std::size_t i = 0; i < num_of_pus_; ++i)
243         {
244             std::size_t core_number = init_core_number(i);
245             HPX_ASSERT(core_number < num_of_cores);
246             core_numbers_.push_back(core_number);
247         }
248 
249         machine_affinity_mask_ = init_machine_affinity_mask();
250         socket_affinity_masks_.reserve(num_of_pus_);
251         numa_node_affinity_masks_.reserve(num_of_pus_);
252         core_affinity_masks_.reserve(num_of_pus_);
253         thread_affinity_masks_.reserve(num_of_pus_);
254 
255         for (std::size_t i = 0; i < num_of_pus_; ++i)
256         {
257             socket_affinity_masks_.push_back(init_socket_affinity_mask(i));
258         }
259 
260         for (std::size_t i = 0; i < num_of_pus_; ++i)
261         {
262             numa_node_affinity_masks_.push_back(init_numa_node_affinity_mask(i));
263         }
264 
265         for (std::size_t i = 0; i < num_of_pus_; ++i)
266         {
267             core_affinity_masks_.push_back(init_core_affinity_mask(i));
268         }
269 
270         for (std::size_t i = 0; i < num_of_pus_; ++i)
271         {
272             thread_affinity_masks_.push_back(init_thread_affinity_mask(i));
273         }
274     } // }}}
275 
write_to_log() const276     void topology::write_to_log() const
277     {
278         std::size_t num_of_sockets = get_number_of_sockets();
279         if (num_of_sockets == 0) num_of_sockets = 1;
280         detail::write_to_log("num_sockets", num_of_sockets);
281 
282 
283         std::size_t num_of_nodes = get_number_of_numa_nodes();
284         if (num_of_nodes == 0) num_of_nodes = 1;
285         detail::write_to_log("num_of_nodes", num_of_nodes);
286 
287         std::size_t num_of_cores = get_number_of_cores();
288         if (num_of_cores == 0) num_of_cores = 1;
289         detail::write_to_log("num_of_cores", num_of_cores);
290 
291         detail::write_to_log("num_of_pus", num_of_pus_);
292 
293         detail::write_to_log("socket_number", socket_numbers_);
294         detail::write_to_log("numa_node_number", numa_node_numbers_);
295         detail::write_to_log("core_number", core_numbers_);
296 
297         detail::write_to_log_mask("machine_affinity_mask", machine_affinity_mask_);
298 
299         detail::write_to_log_mask("socket_affinity_mask", socket_affinity_masks_);
300         detail::write_to_log_mask("numa_node_affinity_mask", numa_node_affinity_masks_);
301         detail::write_to_log_mask("core_affinity_mask", core_affinity_masks_);
302         detail::write_to_log_mask("thread_affinity_mask", thread_affinity_masks_);
303     }
304 
~topology()305     topology::~topology()
306     {
307         if (topo)
308             hwloc_topology_destroy(topo);
309     }
310 
get_pu_number(std::size_t num_core,std::size_t num_pu,error_code & ec) const311     std::size_t topology::get_pu_number(
312         std::size_t num_core
313       , std::size_t num_pu
314       , error_code& ec
315         ) const
316     { // {{{
317         std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
318 
319         int num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE);
320 
321         // If num_cores is smaller 0, we have an error, it should never be zero
322         // either to avoid division by zero, we should always have at least one
323         // core
324         if(num_cores <= 0)
325         {
326             HPX_THROWS_IF(ec, no_success,
327                 "topology::hwloc_get_nobjs_by_type",
328                 "Failed to get number of cores");
329             return std::size_t(-1);
330         }
331         num_core %= num_cores; //-V101 //-V104 //-V107
332 
333         hwloc_obj_t core_obj;
334 
335         core_obj = hwloc_get_obj_by_type(topo,
336             HWLOC_OBJ_CORE, static_cast<unsigned>(num_core));
337 
338         num_pu %= core_obj->arity; //-V101 //-V104
339 
340         return std::size_t(core_obj->children[num_pu]->logical_index);
341     } // }}}
342 
343     ///////////////////////////////////////////////////////////////////////////
get_machine_affinity_mask(error_code & ec) const344     mask_cref_type topology::get_machine_affinity_mask(
345         error_code& ec
346         ) const
347     {
348         if (&ec != &throws)
349             ec = make_success_code();
350 
351         return machine_affinity_mask_;
352     }
353 
get_socket_affinity_mask(std::size_t num_thread,error_code & ec) const354     mask_cref_type topology::get_socket_affinity_mask(
355         std::size_t num_thread
356       , error_code& ec
357         ) const
358     { // {{{
359         std::size_t num_pu = num_thread % num_of_pus_;
360 
361         if (num_pu < socket_affinity_masks_.size())
362         {
363             if (&ec != &throws)
364                 ec = make_success_code();
365 
366             return socket_affinity_masks_[num_pu];
367         }
368 
369         HPX_THROWS_IF(ec, bad_parameter
370           , "hpx::threads::topology::get_socket_affinity_mask"
371           , hpx::util::format(
372                 "thread number %1% is out of range",
373                 num_thread));
374         return empty_mask;
375     } // }}}
376 
get_numa_node_affinity_mask(std::size_t num_thread,error_code & ec) const377     mask_cref_type topology::get_numa_node_affinity_mask(
378         std::size_t num_thread
379       , error_code& ec
380         ) const
381     { // {{{
382         std::size_t num_pu = num_thread % num_of_pus_;
383 
384         if (num_pu < numa_node_affinity_masks_.size())
385         {
386             if (&ec != &throws)
387                 ec = make_success_code();
388 
389             return numa_node_affinity_masks_[num_pu];
390         }
391 
392         HPX_THROWS_IF(ec, bad_parameter
393           , "hpx::threads::topology::get_numa_node_affinity_mask"
394           , hpx::util::format(
395                 "thread number %1% is out of range",
396                 num_thread));
397         return empty_mask;
398     } // }}}
399 
get_core_affinity_mask(std::size_t num_thread,error_code & ec) const400     mask_cref_type topology::get_core_affinity_mask(
401         std::size_t num_thread
402       , error_code& ec
403         ) const
404     {
405         std::size_t num_pu = num_thread % num_of_pus_;
406 
407         if (num_pu < core_affinity_masks_.size())
408         {
409             if (&ec != &throws)
410                 ec = make_success_code();
411 
412             return core_affinity_masks_[num_pu];
413         }
414 
415         HPX_THROWS_IF(ec, bad_parameter
416           , "hpx::threads::topology::get_core_affinity_mask"
417           , hpx::util::format(
418                 "thread number %1% is out of range",
419                 num_thread));
420         return empty_mask;
421     }
422 
get_thread_affinity_mask(std::size_t num_thread,error_code & ec) const423     mask_cref_type topology::get_thread_affinity_mask(
424         std::size_t num_thread
425       , error_code& ec
426         ) const
427     { // {{{
428         std::size_t num_pu = num_thread % num_of_pus_;
429 
430         if (num_pu < thread_affinity_masks_.size())
431         {
432             if (&ec != &throws)
433                 ec = make_success_code();
434 
435             return thread_affinity_masks_[num_pu];
436         }
437 
438         HPX_THROWS_IF(ec, bad_parameter
439           , "hpx::threads::topology::get_thread_affinity_mask"
440           , hpx::util::format(
441                 "thread number %1% is out of range",
442                 num_thread));
443         return empty_mask;
444     } // }}}
445 
446     ///////////////////////////////////////////////////////////////////////////
set_thread_affinity_mask(mask_cref_type mask,error_code & ec) const447     void topology::set_thread_affinity_mask(
448         mask_cref_type mask
449       , error_code& ec
450         ) const
451     { // {{{
452 
453 #if !defined(__APPLE__)
454         // setting thread affinities is not supported by OSX
455         hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
456 
457         int const pu_depth =
458             hwloc_get_type_or_below_depth(topo, HWLOC_OBJ_PU);
459 
460         for (std::size_t i = 0; i != mask_size(mask); ++i)
461         {
462             if (test(mask, i))
463             {
464                 hwloc_obj_t const pu_obj =
465                     hwloc_get_obj_by_depth(topo, pu_depth, unsigned(i));
466                 HPX_ASSERT(i == detail::get_index(pu_obj));
467                 hwloc_bitmap_set(cpuset,
468                     static_cast<unsigned int>(pu_obj->os_index));
469             }
470         }
471 
472         {
473             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
474             if (hwloc_set_cpubind(topo, cpuset,
475                   HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_THREAD))
476             {
477                 // Strict binding not supported or failed, try weak binding.
478                 if (hwloc_set_cpubind(topo, cpuset, HWLOC_CPUBIND_THREAD))
479                 {
480                     boost::scoped_ptr<char> buffer(new char [1024]);
481 
482                     hwloc_bitmap_snprintf(buffer.get(), 1024, cpuset);
483                     hwloc_bitmap_free(cpuset);
484 
485                     HPX_THROWS_IF(ec, kernel_error
486                       , "hpx::threads::topology::set_thread_affinity_mask"
487                       , hpx::util::format(
488                             "failed to set thread affinity mask ("
489                             HPX_CPU_MASK_PREFIX "%x) for cpuset %s",
490                             mask, buffer.get()));
491                     return;
492                 }
493             }
494         }
495 #if defined(__linux) || defined(linux) || defined(__linux__) || defined(__FreeBSD__)
496         sleep(0);   // Allow the OS to pick up the change.
497 #endif
498         hwloc_bitmap_free(cpuset);
499 #endif  // __APPLE__
500 
501         if (&ec != &throws)
502             ec = make_success_code();
503     } // }}}
504 
505     ///////////////////////////////////////////////////////////////////////////
get_thread_affinity_mask_from_lva(naming::address_type lva,error_code & ec) const506     mask_type topology::get_thread_affinity_mask_from_lva(
507         naming::address_type lva
508       , error_code& ec
509         ) const
510     { // {{{
511         if (&ec != &throws)
512             ec = make_success_code();
513 
514         hwloc_membind_policy_t policy = ::HWLOC_MEMBIND_DEFAULT;
515         hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
516 
517         {
518             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
519             int ret =
520 #if HWLOC_API_VERSION >= 0x00010b06
521                 hwloc_get_area_membind(topo, reinterpret_cast<void const*>(lva),
522                     1, nodeset, &policy, HWLOC_MEMBIND_BYNODESET);
523 #else
524                 hwloc_get_area_membind_nodeset(topo,
525                     reinterpret_cast<void const*>(lva), 1, nodeset, &policy, 0);
526 #endif
527 
528             if (-1 != ret)
529             {
530                 hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
531                 hwloc_cpuset_from_nodeset(topo, cpuset, nodeset);
532                 lk.unlock();
533 
534                 hwloc_bitmap_free(nodeset);
535 
536                 mask_type mask = mask_type();
537                 resize(mask, get_number_of_pus());
538 
539                 int const pu_depth =
540                     hwloc_get_type_or_below_depth(topo, HWLOC_OBJ_PU);
541                 for (unsigned int i = 0; std::size_t(i) != num_of_pus_; ++i)
542                 {
543                     hwloc_obj_t const pu_obj =
544                         hwloc_get_obj_by_depth(topo, pu_depth, i);
545                     unsigned idx = static_cast<unsigned>(pu_obj->os_index);
546                     if (hwloc_bitmap_isset(cpuset, idx) != 0)
547                         set(mask, detail::get_index(pu_obj));
548                 }
549 
550                 hwloc_bitmap_free(cpuset);
551                 return mask;
552             }
553         }
554 
555         hwloc_bitmap_free(nodeset);
556         return empty_mask;
557     } // }}}
558 
init_numa_node_number(std::size_t num_thread)559     std::size_t topology::init_numa_node_number(std::size_t num_thread)
560     {
561 #if HWLOC_API_VERSION >= 0x00020000
562         if (std::size_t(-1) == num_thread)
563             return std::size_t(-1);
564 
565         std::size_t num_pu = (num_thread + pu_offset) % num_of_pus_;
566 
567         hwloc_obj_t obj;
568         {
569             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
570             obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU,
571                 static_cast<unsigned>(num_pu));
572             HPX_ASSERT(num_pu == detail::get_index(obj));
573         }
574 
575         hwloc_obj_t tmp = nullptr;
576         while ((tmp = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_NUMANODE, tmp))
577                 != nullptr) {
578             if (hwloc_bitmap_intersects(tmp->cpuset, obj->cpuset)) {
579                 /* tmp matches, use it */
580                 return tmp->logical_index;
581             }
582         }
583         return 0;
584 #else
585         return init_node_number(num_thread, HWLOC_OBJ_NODE);
586 #endif
587     }
588 
init_node_number(std::size_t num_thread,hwloc_obj_type_t type)589     std::size_t topology::init_node_number(
590         std::size_t num_thread, hwloc_obj_type_t type
591         )
592     { // {{{
593         if (std::size_t(-1) == num_thread)
594             return std::size_t(-1);
595 
596         std::size_t num_pu = (num_thread + pu_offset) % num_of_pus_;
597 
598         {
599             hwloc_obj_t obj;
600 
601             {
602                 std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
603                 obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU,
604                     static_cast<unsigned>(num_pu));
605                 HPX_ASSERT(num_pu == detail::get_index(obj));
606             }
607 
608             while (obj)
609             {
610                 if (hwloc_compare_types(obj->type, type) == 0)
611                 {
612                     return detail::get_index(obj);
613                 }
614                 obj = obj->parent;
615             }
616         }
617 
618         return 0;
619     } // }}}
620 
extract_node_mask(hwloc_obj_t parent,mask_type & mask) const621     void topology::extract_node_mask(
622         hwloc_obj_t parent
623       , mask_type& mask
624         ) const
625     { // {{{
626         hwloc_obj_t obj;
627 
628         {
629             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
630             obj = hwloc_get_next_child(topo, parent, nullptr);
631         }
632 
633         while (obj)
634         {
635             if (hwloc_compare_types(HWLOC_OBJ_PU, obj->type) == 0)
636             {
637                 do {
638                     set(mask, detail::get_index(obj)); //-V106
639                     {
640                         std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
641                         obj = hwloc_get_next_child(topo, parent, obj);
642                     }
643                 } while (obj != nullptr &&
644                          hwloc_compare_types(HWLOC_OBJ_PU, obj->type) == 0);
645                 return;
646             }
647 
648             extract_node_mask(obj, mask);
649 
650             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
651             obj = hwloc_get_next_child(topo, parent, obj);
652         }
653     } // }}}
654 
extract_node_count(hwloc_obj_t parent,hwloc_obj_type_t type,std::size_t count) const655     std::size_t topology::extract_node_count(
656         hwloc_obj_t parent
657       , hwloc_obj_type_t type
658       , std::size_t count
659         ) const
660     { // {{{
661         hwloc_obj_t obj;
662 
663         if(parent == nullptr) return count;
664 
665         {
666             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
667             obj = hwloc_get_next_child(topo, parent, nullptr);
668         }
669 
670         while (obj)
671         {
672             if (hwloc_compare_types(type, obj->type) == 0)
673             {
674                 /*
675                 do {
676                     ++count;
677                     {
678                         std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
679                         obj = hwloc_get_next_child(topo, parent, obj);
680                     }
681                 } while (obj != nullptr && hwloc_compare_types(type, obj->type) == 0);
682                 return count;
683                 */
684                 ++count;
685             }
686 
687             count = extract_node_count(obj, type, count);
688 
689             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
690             obj = hwloc_get_next_child(topo, parent, obj);
691         }
692 
693         return count;
694     } // }}}
695 
get_number_of_sockets() const696     std::size_t topology::get_number_of_sockets() const
697     {
698         int nobjs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET);
699         if(0 > nobjs)
700         {
701             HPX_THROW_EXCEPTION(kernel_error
702               , "hpx::threads::topology::get_number_of_sockets"
703               , "hwloc_get_nbobjs_by_type failed");
704             return std::size_t(nobjs);
705         }
706         return std::size_t(nobjs);
707     }
708 
get_number_of_numa_nodes() const709     std::size_t topology::get_number_of_numa_nodes() const
710     {
711         int nobjs =  hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_NUMANODE);
712         if(0 > nobjs)
713         {
714             HPX_THROW_EXCEPTION(kernel_error
715               , "hpx::threads::topology::get_number_of_numa_nodes"
716               , "hwloc_get_nbobjs_by_type failed");
717             return std::size_t(nobjs);
718         }
719         return std::size_t(nobjs);
720     }
721 
get_number_of_cores() const722     std::size_t topology::get_number_of_cores() const
723     {
724         int nobjs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE);
725         // If num_cores is smaller 0, we have an error
726         if (0 > nobjs)
727         {
728             HPX_THROW_EXCEPTION(kernel_error
729               , "hpx::threads::topology::get_number_of_cores"
730               , "hwloc_get_nbobjs_by_type(HWLOC_OBJ_CORE) failed");
731             return std::size_t(nobjs);
732         }
733         else if (0 == nobjs)
734         {
735             // some platforms report zero cores but might still report the
736             // number of PUs
737             nobjs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
738             if (0 > nobjs)
739             {
740                 HPX_THROW_EXCEPTION(kernel_error
741                   , "hpx::threads::topology::get_number_of_cores"
742                   , "hwloc_get_nbobjs_by_type(HWLOC_OBJ_PU) failed");
743                 return std::size_t(nobjs);
744             }
745         }
746 
747         // the number of reported cores/pus should never be zero either to
748         // avoid division by zero, we should always have at least one core
749         if (0 == nobjs)
750         {
751             HPX_THROW_EXCEPTION(kernel_error
752               , "hpx::threads::topology::get_number_of_cores"
753               , "hwloc_get_nbobjs_by_type reports zero cores/pus");
754             return std::size_t(nobjs);
755         }
756 
757         return std::size_t(nobjs);
758     }
759 
get_number_of_socket_pus(std::size_t num_socket) const760     std::size_t topology::get_number_of_socket_pus(
761         std::size_t num_socket
762         ) const
763     {
764         hwloc_obj_t socket_obj = nullptr;
765 
766         {
767             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
768             socket_obj = hwloc_get_obj_by_type(topo,
769                 HWLOC_OBJ_SOCKET, static_cast<unsigned>(num_socket));
770         }
771 
772         if (socket_obj)
773         {
774             HPX_ASSERT(num_socket == detail::get_index(socket_obj));
775             std::size_t pu_count = 0;
776             return extract_node_count(socket_obj, HWLOC_OBJ_PU, pu_count);
777         }
778 
779         return num_of_pus_;
780     }
781 
get_number_of_numa_node_pus(std::size_t numa_node) const782     std::size_t topology::get_number_of_numa_node_pus(
783         std::size_t numa_node
784         ) const
785     {
786         hwloc_obj_t node_obj = nullptr;
787 
788         {
789             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
790             node_obj = hwloc_get_obj_by_type(topo,
791                 HWLOC_OBJ_NODE, static_cast<unsigned>(numa_node));
792         }
793 
794         if (node_obj)
795         {
796             HPX_ASSERT(numa_node == detail::get_index(node_obj));
797             std::size_t pu_count = 0;
798             node_obj = detail::adjust_node_obj(node_obj);
799             return extract_node_count(node_obj, HWLOC_OBJ_PU, pu_count);
800         }
801 
802         return num_of_pus_;
803     }
804 
get_number_of_core_pus(std::size_t core) const805     std::size_t topology::get_number_of_core_pus(
806         std::size_t core
807         ) const
808     {
809         hwloc_obj_t core_obj = nullptr;
810 
811         {
812             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
813             core_obj = hwloc_get_obj_by_type(topo,
814                 HWLOC_OBJ_CORE, static_cast<unsigned>(core));
815         }
816 
817         if (core_obj)
818         {
819             HPX_ASSERT(core == detail::get_index(core_obj));
820             std::size_t pu_count = 0;
821             return extract_node_count(core_obj, HWLOC_OBJ_PU, pu_count);
822         }
823 
824         return num_of_pus_;
825     }
826 
get_number_of_socket_cores(std::size_t num_socket) const827     std::size_t topology::get_number_of_socket_cores(
828         std::size_t num_socket
829         ) const
830     {
831         hwloc_obj_t socket_obj = nullptr;
832 
833         {
834             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
835             socket_obj = hwloc_get_obj_by_type(topo,
836                 HWLOC_OBJ_SOCKET, static_cast<unsigned>(num_socket));
837         }
838 
839         if (socket_obj)
840         {
841             HPX_ASSERT(num_socket == detail::get_index(socket_obj));
842             std::size_t pu_count = 0;
843             return extract_node_count(socket_obj, HWLOC_OBJ_CORE, pu_count);
844         }
845 
846         return get_number_of_cores();
847     }
848 
get_number_of_numa_node_cores(std::size_t numa_node) const849     std::size_t topology::get_number_of_numa_node_cores(
850         std::size_t numa_node
851         ) const
852     {
853         hwloc_obj_t node_obj = nullptr;
854         {
855             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
856             node_obj = hwloc_get_obj_by_type(topo,
857                 HWLOC_OBJ_NODE, static_cast<unsigned>(numa_node));
858         }
859 
860         if (node_obj)
861         {
862             HPX_ASSERT(numa_node == detail::get_index(node_obj));
863             std::size_t pu_count = 0;
864             node_obj = detail::adjust_node_obj(node_obj);
865             return extract_node_count(node_obj, HWLOC_OBJ_CORE, pu_count);
866         }
867 
868         return get_number_of_cores();
869     }
870 
cpuset_to_nodeset(mask_cref_type mask) const871     hwloc_bitmap_ptr topology::cpuset_to_nodeset(
872         mask_cref_type mask) const
873     {
874         hwloc_bitmap_t cpuset  = mask_to_bitmap(mask, HWLOC_OBJ_PU);
875         hwloc_bitmap_t nodeset = hwloc_bitmap_alloc();
876 #if HWLOC_API_VERSION >= 0x00020000
877         hwloc_cpuset_to_nodeset(topo, cpuset, nodeset);
878 #else
879         hwloc_cpuset_to_nodeset_strict(topo, cpuset, nodeset);
880 #endif
881         hwloc_bitmap_free(cpuset);
882         return std::make_shared<hpx::threads::hpx_hwloc_bitmap_wrapper>(nodeset);
883     }
884 
885     namespace detail
886     {
print_info(std::ostream & os,hwloc_obj_t obj,char const * name,bool comma)887         void print_info(std::ostream& os, hwloc_obj_t obj, char const* name,
888             bool comma)
889         {
890             if (comma)
891                 os << ", ";
892             os << name;
893 
894             if (obj->logical_index != ~0x0u)
895                 os << "L#" << obj->logical_index;
896             if (obj->os_index != ~0x0u)
897                 os << "(P#" << obj->os_index << ")";
898         }
899 
print_info(std::ostream & os,hwloc_obj_t obj,bool comma=false)900         void print_info(std::ostream& os, hwloc_obj_t obj, bool comma = false)
901         {
902             switch (obj->type) {
903             case HWLOC_OBJ_PU:
904                 print_info(os, obj, "PU ", comma);
905                 break;
906 
907             case HWLOC_OBJ_CORE:
908                 print_info(os, obj, "Core ", comma);
909                 break;
910 
911             case HWLOC_OBJ_SOCKET:
912                 print_info(os, obj, "Socket ", comma);
913                 break;
914 
915             case HWLOC_OBJ_NODE:
916                 print_info(os, obj, "Node ", comma);
917                 break;
918 
919             default:
920                 break;
921             }
922         }
923     }
924 
print_affinity_mask(std::ostream & os,std::size_t num_thread,mask_cref_type m,const std::string & pool_name) const925     void topology::print_affinity_mask(std::ostream& os,
926         std::size_t num_thread, mask_cref_type m, const std::string &pool_name) const
927     {
928         boost::io::ios_flags_saver ifs(os);
929         bool first = true;
930 
931         for(std::size_t i = 0; i != num_of_pus_; ++i)
932         {
933 
934             hwloc_obj_t obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, unsigned(i));
935             if (!obj)
936             {
937                 HPX_THROW_EXCEPTION(kernel_error
938                   , "hpx::threads::topology::print_affinity_mask"
939                   , "object not found");
940                 return;
941             }
942 
943             if(!test(m, detail::get_index(obj))) //-V106
944                 continue;
945 
946             if (first) {
947                 first = false;
948                 os << std::setw(4) << num_thread << ": "; //-V112 //-V128
949             }
950             else {
951                 os << "      ";
952             }
953 
954             detail::print_info(os, obj);
955 
956             while(obj->parent)
957             {
958                 detail::print_info(os, obj->parent, true);
959                 obj = obj->parent;
960             }
961 
962             os << ", on pool \"" << pool_name << "\"";
963 
964             os << std::endl;
965         }
966     }
967 
init_machine_affinity_mask() const968     mask_type topology::init_machine_affinity_mask() const
969     { // {{{
970         mask_type machine_affinity_mask = mask_type();
971         resize(machine_affinity_mask, get_number_of_pus());
972 
973         hwloc_obj_t machine_obj;
974         {
975             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
976             machine_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_MACHINE, 0);
977         }
978         if (machine_obj)
979         {
980             extract_node_mask(machine_obj, machine_affinity_mask);
981             return machine_affinity_mask;
982         }
983 
984         HPX_THROW_EXCEPTION(kernel_error
985           , "hpx::threads::topology::init_machine_affinity_mask"
986           , "failed to initialize machine affinity mask");
987         return empty_mask;
988     } // }}}
989 
init_socket_affinity_mask_from_socket(std::size_t num_socket) const990     mask_type topology::init_socket_affinity_mask_from_socket(
991         std::size_t num_socket
992         ) const
993     { // {{{
994         // If we have only one or no socket, the socket affinity mask
995         // spans all processors
996         if (std::size_t(-1) == num_socket)
997             return machine_affinity_mask_;
998 
999         hwloc_obj_t socket_obj = nullptr;
1000         {
1001             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
1002             socket_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET,
1003                 static_cast<unsigned>(num_socket));
1004         }
1005 
1006         if (socket_obj)
1007         {
1008             HPX_ASSERT(num_socket == detail::get_index(socket_obj));
1009 
1010             mask_type socket_affinity_mask = mask_type();
1011             resize(socket_affinity_mask, get_number_of_pus());
1012 
1013             extract_node_mask(socket_obj, socket_affinity_mask);
1014             return socket_affinity_mask;
1015         }
1016 
1017         return machine_affinity_mask_;
1018     } // }}}
1019 
init_numa_node_affinity_mask_from_numa_node(std::size_t numa_node) const1020     mask_type topology::init_numa_node_affinity_mask_from_numa_node(
1021         std::size_t numa_node
1022         ) const
1023     { // {{{
1024         // If we have only one or no NUMA domain, the NUMA affinity mask
1025         // spans all processors
1026         if (std::size_t(-1) == numa_node)
1027         {
1028             return machine_affinity_mask_;
1029         }
1030 
1031         hwloc_obj_t numa_node_obj = nullptr;
1032         {
1033             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
1034             numa_node_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_NODE,
1035                 static_cast<unsigned>(numa_node));
1036         }
1037 
1038         if (numa_node_obj)
1039         {
1040             HPX_ASSERT(numa_node == detail::get_index(numa_node_obj));
1041             mask_type node_affinity_mask = mask_type();
1042             resize(node_affinity_mask, get_number_of_pus());
1043 
1044             numa_node_obj = detail::adjust_node_obj(numa_node_obj);
1045             extract_node_mask(numa_node_obj, node_affinity_mask);
1046             return node_affinity_mask;
1047         }
1048 
1049         return machine_affinity_mask_;
1050     } // }}}
1051 
init_core_affinity_mask_from_core(std::size_t core,mask_cref_type default_mask) const1052     mask_type topology::init_core_affinity_mask_from_core(
1053         std::size_t core, mask_cref_type default_mask
1054         ) const
1055     { // {{{
1056         if (std::size_t(-1) == core)
1057             return default_mask;
1058 
1059         hwloc_obj_t core_obj = nullptr;
1060 
1061         std::size_t num_core = (core + core_offset) % get_number_of_cores();
1062 
1063         {
1064             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
1065             core_obj = hwloc_get_obj_by_type(topo,
1066                 HWLOC_OBJ_CORE, static_cast<unsigned>(num_core));
1067         }
1068 
1069         if (core_obj)
1070         {
1071             HPX_ASSERT(num_core == detail::get_index(core_obj));
1072             mask_type core_affinity_mask = mask_type();
1073             resize(core_affinity_mask, get_number_of_pus());
1074 
1075             extract_node_mask(core_obj, core_affinity_mask);
1076             return core_affinity_mask;
1077         }
1078 
1079         return default_mask;
1080     } // }}}
1081 
init_thread_affinity_mask(std::size_t num_thread) const1082     mask_type topology::init_thread_affinity_mask(
1083         std::size_t num_thread
1084         ) const
1085     { // {{{
1086 
1087         if (std::size_t(-1) == num_thread)
1088         {
1089             return get_core_affinity_mask(num_thread);
1090         }
1091 
1092         std::size_t num_pu = (num_thread + pu_offset) % num_of_pus_;
1093 
1094         hwloc_obj_t obj = nullptr;
1095 
1096         {
1097             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
1098             obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU,
1099                     static_cast<unsigned>(num_pu));
1100         }
1101 
1102         if (!obj)
1103         {
1104             return get_core_affinity_mask(num_thread);
1105         }
1106 
1107         HPX_ASSERT(num_pu == detail::get_index(obj));
1108         mask_type mask = mask_type();
1109         resize(mask, get_number_of_pus());
1110 
1111         set(mask, detail::get_index(obj)); //-V106
1112 
1113         return mask;
1114     } // }}}
1115 
init_thread_affinity_mask(std::size_t num_core,std::size_t num_pu) const1116     mask_type topology::init_thread_affinity_mask(
1117         std::size_t num_core,
1118         std::size_t num_pu
1119         ) const
1120     { // {{{
1121         hwloc_obj_t obj = nullptr;
1122 
1123         {
1124             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
1125             int num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE);
1126             // If num_cores is smaller 0, we have an error, it should never be zero
1127             // either to avoid division by zero, we should always have at least one
1128             // core
1129             if (num_cores <= 0) {
1130                 HPX_THROW_EXCEPTION(kernel_error
1131                   , "hpx::threads::topology::init_thread_affinity_mask"
1132                   , "hwloc_get_nbobjs_by_type failed");
1133                 return empty_mask;
1134             }
1135 
1136             num_core = (num_core + core_offset) % std::size_t(num_cores);
1137             obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE,
1138                     static_cast<unsigned>(num_core));
1139         }
1140 
1141         if (!obj)
1142             return empty_mask;//get_core_affinity_mask(num_thread, false);
1143 
1144         HPX_ASSERT(num_core == detail::get_index(obj));
1145 
1146         num_pu %= obj->arity; //-V101 //-V104
1147 
1148         mask_type mask = mask_type();
1149         resize(mask, get_number_of_pus());
1150 
1151         set(mask, detail::get_index(obj->children[num_pu])); //-V106
1152 
1153         return mask;
1154     } // }}}
1155 
1156     ///////////////////////////////////////////////////////////////////////////
init_num_of_pus()1157     void topology::init_num_of_pus()
1158     {
1159         num_of_pus_ = 1;
1160         {
1161             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
1162             int num_of_pus = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
1163 
1164             if (num_of_pus > 0)
1165             {
1166                 num_of_pus_ = static_cast<std::size_t>(num_of_pus);
1167             }
1168         }
1169     }
1170 
get_number_of_pus() const1171     std::size_t topology::get_number_of_pus() const
1172     {
1173         return num_of_pus_;
1174     }
1175 
1176     ///////////////////////////////////////////////////////////////////////////
get_cpubind_mask(error_code & ec) const1177     mask_type topology::get_cpubind_mask(error_code& ec) const
1178     {
1179         hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
1180 
1181         mask_type mask = mask_type();
1182         resize(mask, get_number_of_pus());
1183 
1184         {
1185             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
1186             if (hwloc_get_cpubind(topo, cpuset, HWLOC_CPUBIND_THREAD))
1187             {
1188                 hwloc_bitmap_free(cpuset);
1189                 HPX_THROWS_IF(ec, kernel_error
1190                   , "hpx::threads::topology::get_cpubind_mask"
1191                   , "hwloc_get_cpubind failed");
1192                 return empty_mask;
1193             }
1194 
1195             int const pu_depth = hwloc_get_type_or_below_depth(topo, HWLOC_OBJ_PU);
1196             for (unsigned int i = 0; i != num_of_pus_; ++i) //-V104
1197             {
1198                 hwloc_obj_t const pu_obj = hwloc_get_obj_by_depth(topo, pu_depth, i);
1199                 unsigned idx = static_cast<unsigned>(pu_obj->os_index);
1200                 if (hwloc_bitmap_isset(cpuset, idx) != 0)
1201                     set(mask, detail::get_index(pu_obj));
1202             }
1203         }
1204 
1205         hwloc_bitmap_free(cpuset);
1206 
1207         if (&ec != &throws)
1208             ec = make_success_code();
1209 
1210         return mask;
1211     }
1212 
get_cpubind_mask(compat::thread & handle,error_code & ec) const1213     mask_type topology::get_cpubind_mask(compat::thread& handle,
1214         error_code& ec) const
1215     {
1216         hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
1217 
1218         mask_type mask = mask_type();
1219         resize(mask, get_number_of_pus());
1220 
1221         {
1222             std::unique_lock<hpx::util::spinlock> lk(topo_mtx);
1223 #if defined(HPX_MINGW)
1224             if (hwloc_get_thread_cpubind(topo,
1225                     pthread_gethandle(handle.native_handle()), cpuset,
1226                     HWLOC_CPUBIND_THREAD))
1227 #else
1228             if (hwloc_get_thread_cpubind(topo, handle.native_handle(), cpuset,
1229                     HWLOC_CPUBIND_THREAD))
1230 #endif
1231             {
1232                 hwloc_bitmap_free(cpuset);
1233                 HPX_THROWS_IF(ec, kernel_error
1234                   , "hpx::threads::topology::get_cpubind_mask"
1235                   , "hwloc_get_cpubind failed");
1236                 return empty_mask;
1237             }
1238 
1239             int const pu_depth = hwloc_get_type_or_below_depth(topo, HWLOC_OBJ_PU);
1240             for (unsigned int i = 0; i != num_of_pus_; ++i) //-V104
1241             {
1242                 hwloc_obj_t const pu_obj =
1243                     hwloc_get_obj_by_depth(topo, pu_depth, i);
1244                 unsigned idx = static_cast<unsigned>(pu_obj->os_index);
1245                 if (hwloc_bitmap_isset(cpuset, idx) != 0)
1246                     set(mask, detail::get_index(pu_obj));
1247             }
1248         }
1249 
1250         hwloc_bitmap_free(cpuset);
1251 
1252         if (&ec != &throws)
1253             ec = make_success_code();
1254 
1255         return mask;
1256     }
1257 
1258 
1259     ///////////////////////////////////////////////////////////////////////////
1260     /// This is equivalent to malloc(), except that it tries to allocate
1261     /// page-aligned memory from the OS.
allocate(std::size_t len) const1262     void* topology::allocate(std::size_t len) const
1263     {
1264         return hwloc_alloc(topo, len);
1265     }
1266 
1267     ///////////////////////////////////////////////////////////////////////////
1268     /// Allocate some memory on NUMA memory nodes specified by nodeset
1269     /// as specified by the hwloc hwloc_alloc_membind_nodeset call
allocate_membind(std::size_t len,hwloc_bitmap_ptr bitmap,hpx_hwloc_membind_policy policy,int flags) const1270     void* topology::allocate_membind(std::size_t len,
1271         hwloc_bitmap_ptr bitmap,
1272         hpx_hwloc_membind_policy policy, int flags) const
1273     {
1274         return
1275 #if HWLOC_API_VERSION >= 0x00010b06
1276             hwloc_alloc_membind(topo, len, bitmap->get_bmp(),
1277                 (hwloc_membind_policy_t)(policy),
1278                 flags | HWLOC_MEMBIND_BYNODESET);
1279 #else
1280             hwloc_alloc_membind_nodeset(topo, len, bitmap->get_bmp(),
1281                 (hwloc_membind_policy_t)(policy), flags);
1282 #endif
1283     }
1284 
set_area_membind_nodeset(const void * addr,std::size_t len,void * nodeset) const1285     bool topology::set_area_membind_nodeset(
1286         const void *addr, std::size_t len, void *nodeset) const
1287     {
1288 #if !defined(__APPLE__)
1289         hwloc_membind_policy_t policy = ::HWLOC_MEMBIND_BIND;
1290         hwloc_nodeset_t ns = reinterpret_cast<hwloc_nodeset_t>(nodeset);
1291         int ret =
1292 #if HWLOC_API_VERSION >= 0x00010b06
1293             hwloc_set_area_membind(
1294                 topo, addr, len, ns, policy, HWLOC_MEMBIND_BYNODESET);
1295 #else
1296             hwloc_set_area_membind_nodeset(topo, addr, len, ns, policy, 0);
1297 #endif
1298 
1299         if (ret < 0)
1300         {
1301             std::string msg = std::strerror(errno);
1302             if (errno == ENOSYS) msg = "the action is not supported";
1303             if (errno == EXDEV)  msg = "the binding cannot be enforced";
1304             HPX_THROW_EXCEPTION(kernel_error
1305               , "hpx::threads::topology::set_area_membind_nodeset"
1306               , "hwloc_set_area_membind_nodeset failed : " + msg);
1307             return false;
1308         }
1309 #endif
1310         return true;
1311     }
1312 
1313     util::thread_specific_ptr<hpx_hwloc_bitmap_wrapper, topology::tls_tag>
1314         topology::bitmap_storage_;
1315 
get_area_membind_nodeset(const void * addr,std::size_t len) const1316     threads::mask_type topology::get_area_membind_nodeset(
1317         const void *addr, std::size_t len) const
1318     {
1319         hpx_hwloc_bitmap_wrapper *nodeset = topology::bitmap_storage_.get();
1320         if (nullptr == nodeset)
1321         {
1322             hwloc_bitmap_t nodeset_ = hwloc_bitmap_alloc();
1323             topology::bitmap_storage_.reset(new hpx_hwloc_bitmap_wrapper(nodeset_));
1324             nodeset = topology::bitmap_storage_.get();
1325         }
1326         //
1327         hwloc_membind_policy_t policy;
1328         hwloc_nodeset_t ns = reinterpret_cast<hwloc_nodeset_t>(nodeset->get_bmp());
1329 
1330         if (
1331 #if HWLOC_API_VERSION >= 0x00010b06
1332             hwloc_get_area_membind(
1333                 topo, addr, len, ns, &policy, HWLOC_MEMBIND_BYNODESET)
1334 #else
1335             hwloc_get_area_membind_nodeset(topo, addr, len, ns, &policy, 0)
1336 #endif
1337             == -1)
1338         {
1339             HPX_THROW_EXCEPTION(kernel_error,
1340                 "hpx::threads::topology::get_area_membind_nodeset",
1341                 "hwloc_get_area_membind_nodeset failed");
1342             return -1;
1343             std::cout << "error in  ";
1344         }
1345         return bitmap_to_mask(ns, HWLOC_OBJ_NUMANODE);
1346     }
1347 
get_numa_domain(const void * addr) const1348     int topology::get_numa_domain(const void *addr) const
1349     {
1350 #if HWLOC_API_VERSION >= 0x00010b06
1351         hpx_hwloc_bitmap_wrapper *nodeset = topology::bitmap_storage_.get();
1352         if (nullptr == nodeset)
1353         {
1354             hwloc_bitmap_t nodeset_ = hwloc_bitmap_alloc();
1355             topology::bitmap_storage_.reset(new hpx_hwloc_bitmap_wrapper(nodeset_));
1356             nodeset = topology::bitmap_storage_.get();
1357         }
1358         //
1359         hwloc_nodeset_t ns = reinterpret_cast<hwloc_nodeset_t>(nodeset->get_bmp());
1360 
1361         int ret = hwloc_get_area_memlocation(topo, addr, 1,  ns,
1362             HWLOC_MEMBIND_BYNODESET);
1363         if (ret<0) {
1364             std::string msg(strerror(errno));
1365             HPX_THROW_EXCEPTION(kernel_error
1366               , "hpx::threads::topology::get_numa_domain"
1367               , "hwloc_get_area_memlocation failed " + msg);
1368             return -1;
1369         }
1370         threads::mask_type mask = bitmap_to_mask(ns, HWLOC_OBJ_NUMANODE);
1371         return static_cast<int>(threads::find_first(mask));
1372 #else
1373         return 0;
1374 #endif
1375     }
1376 
1377     /// Free memory that was previously allocated by allocate
deallocate(void * addr,std::size_t len) const1378     void topology::deallocate(void* addr, std::size_t len) const
1379     {
1380         hwloc_free(topo, addr, len);
1381     }
1382 
1383     ///////////////////////////////////////////////////////////////////////////
mask_to_bitmap(mask_cref_type mask,hwloc_obj_type_t htype) const1384     hwloc_bitmap_t topology::mask_to_bitmap(mask_cref_type mask,
1385         hwloc_obj_type_t htype) const
1386     {
1387         hwloc_bitmap_t bitmap = hwloc_bitmap_alloc();
1388         hwloc_bitmap_zero(bitmap);
1389         //
1390         int const depth =
1391             hwloc_get_type_or_below_depth(topo, htype);
1392 
1393         for (std::size_t i = 0; i != mask_size(mask); ++i) {
1394             if (test(mask, i)) {
1395                 hwloc_obj_t const hw_obj =
1396                     hwloc_get_obj_by_depth(topo, depth, unsigned(i));
1397                 HPX_ASSERT(i == detail::get_index(hw_obj));
1398                 hwloc_bitmap_set(bitmap,
1399                     static_cast<unsigned int>(hw_obj->os_index));
1400             }
1401         }
1402         return bitmap;
1403     }
1404 
1405     ///////////////////////////////////////////////////////////////////////////
bitmap_to_mask(hwloc_bitmap_t bitmap,hwloc_obj_type_t htype) const1406     mask_type topology::bitmap_to_mask(hwloc_bitmap_t bitmap,
1407         hwloc_obj_type_t htype) const
1408     {
1409         mask_type mask = mask_type();
1410         std::size_t num = hwloc_get_nbobjs_by_type(topo, htype);
1411         //
1412         int const pu_depth = hwloc_get_type_or_below_depth(topo, htype);
1413         for (unsigned int i=0; std::size_t(i)!=num; ++i) //-V104
1414         {
1415             hwloc_obj_t const pu_obj =
1416                 hwloc_get_obj_by_depth(topo, pu_depth, i);
1417             unsigned idx = static_cast<unsigned>(pu_obj->os_index);
1418             if (hwloc_bitmap_isset(bitmap, idx) != 0)
1419                 set(mask, detail::get_index(pu_obj));
1420         }
1421         return mask;
1422     }
1423 
1424     ///////////////////////////////////////////////////////////////////////////
print_mask_vector(std::ostream & os,std::vector<mask_type> const & v) const1425     void topology::print_mask_vector(std::ostream& os,
1426         std::vector<mask_type> const& v) const
1427     {
1428         std::size_t s = v.size();
1429         if (s == 0)
1430         {
1431             os << "(empty)\n";
1432             return;
1433         }
1434 
1435         for (std::size_t i = 0; i != s; i++)
1436         {
1437             os << std::hex << HPX_CPU_MASK_PREFIX << v[i] << "\n";
1438         }
1439         os << "\n";
1440     }
1441 
print_vector(std::ostream & os,std::vector<std::size_t> const & v) const1442     void topology::print_vector(
1443         std::ostream& os, std::vector<std::size_t> const& v) const
1444     {
1445         std::size_t s = v.size();
1446         if (s == 0)
1447         {
1448             os << "(empty)\n";
1449             return;
1450         }
1451 
1452         os << v[0];
1453         for (std::size_t i = 1; i != s; i++)
1454         {
1455             os << ", " << std::dec << v[i];
1456         }
1457         os << "\n";
1458     }
1459 
print_hwloc(std::ostream & os) const1460     void topology::print_hwloc(std::ostream& os) const
1461     {
1462         os << "[HWLOC topology info] number of ...\n" << std::dec
1463            << "number of sockets     : " << get_number_of_sockets()
1464            << "\n"
1465            << "number of numa nodes  : " << get_number_of_numa_nodes()
1466            << "\n"
1467            << "number of cores       : " << get_number_of_cores() << "\n"
1468            << "number of PUs         : " << get_number_of_pus() << "\n"
1469            << "hardware concurrency  : "
1470            << hpx::threads::hardware_concurrency() << "\n" << std::endl;
1471         //! -------------------------------------- topology (affinity masks)
1472         os << "[HWLOC topology info] affinity masks :\n"
1473            << "machine               : \n"
1474            << std::hex << HPX_CPU_MASK_PREFIX
1475            << machine_affinity_mask_ << "\n";
1476 
1477         os << "socket                : \n";
1478         print_mask_vector(os, socket_affinity_masks_);
1479         os << "numa node             : \n";
1480         print_mask_vector(os, numa_node_affinity_masks_);
1481         os << "core                  : \n";
1482         print_mask_vector(os, core_affinity_masks_);
1483         os << "PUs (/threads)        : \n";
1484         print_mask_vector(os, thread_affinity_masks_);
1485 
1486         //! -------------------------------------- topology (numbers)
1487         os << "[HWLOC topology info] resource numbers :\n";
1488         os << "socket                : \n";
1489         print_vector(os, socket_numbers_);
1490         os << "numa node             : \n";
1491         print_vector(os, numa_node_numbers_);
1492         os << "core                  : \n";
1493         print_vector(os, core_numbers_);
1494         //os << "PUs (/threads)        : \n";
1495         //print_vector(os, pu_numbers_);
1496     }
1497 
get_topology()1498     topology const& get_topology()
1499     {
1500         hpx::runtime* rt = hpx::get_runtime_ptr();
1501         if (rt == nullptr)
1502         {
1503             HPX_THROW_EXCEPTION(invalid_status, "hpx::threads::get_topology",
1504                 "the hpx runtime system has not been initialized yet");
1505         }
1506         return rt->get_topology();
1507     }
1508 
1509     ///////////////////////////////////////////////////////////////////////////
1510     struct hardware_concurrency_tag {};
1511 
1512     struct hw_concurrency
1513     {
hw_concurrencyhpx::threads::hw_concurrency1514         hw_concurrency()
1515 #if defined(__ANDROID__) && defined(ANDROID)
1516           : num_of_cores_(::android_getCpuCount())
1517 #else
1518           : num_of_cores_(detail::hwloc_hardware_concurrency())
1519 #endif
1520         {
1521             if (num_of_cores_ == 0)
1522                 num_of_cores_ = 1;
1523         }
1524 
1525         std::size_t num_of_cores_;
1526     };
1527 
hardware_concurrency()1528     std::size_t hardware_concurrency()
1529     {
1530         util::static_<hw_concurrency, hardware_concurrency_tag> hwc;
1531         return hwc.get().num_of_cores_;
1532     }
1533 }}
1534 
1535