1 // Copyright (c) 2007-2017 Hartmut Kaiser 2 // Copyright (c) 2008-2009 Chirag Dekate, Anshul Tandon 3 // Copyright (c) 2012-2013 Thomas Heller 4 // 5 // Distributed under the Boost Software License, Version 1.0. (See accompanying 6 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 8 #include <hpx/runtime/threads/topology.hpp> 9 10 #include <hpx/compat/thread.hpp> 11 #include <hpx/error_code.hpp> 12 #include <hpx/exception.hpp> 13 #include <hpx/throw_exception.hpp> 14 #include <hpx/util/assert.hpp> 15 #include <hpx/util/format.hpp> 16 #include <hpx/util/logging.hpp> 17 #include <hpx/util/spinlock.hpp> 18 #include <hpx/runtime.hpp> 19 #include <hpx/runtime/naming/address.hpp> 20 #include <hpx/runtime/threads/cpu_mask.hpp> 21 #include <hpx/runtime/threads/topology.hpp> 22 23 #include <boost/io/ios_state.hpp> 24 #include <boost/scoped_ptr.hpp> 25 26 #include <cstddef> 27 #include <iomanip> 28 #include <iostream> 29 #include <mutex> 30 #include <string> 31 #include <vector> 32 #include <memory> 33 34 #include <errno.h> 35 36 #include <hwloc.h> 37 38 #if HWLOC_API_VERSION < 0x00010b00 39 # define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE 40 #endif 41 42 #if defined(__ANDROID__) && defined(ANDROID) 43 #include <cpu-features.h> 44 #endif 45 46 #if defined(__bgq__) 47 #include <hwi/include/bqc/A2_inlines.h> 48 #endif 49 50 #if defined(_POSIX_VERSION) 51 #include <sys/syscall.h> 52 #include <sys/resource.h> 53 #endif 54 55 namespace hpx { namespace threads { namespace detail 56 { hwloc_hardware_concurrency()57 std::size_t hwloc_hardware_concurrency() 58 { 59 threads::topology& top = threads::create_topology(); 60 return top.get_number_of_pus(); 61 } 62 write_to_log(char const * valuename,std::size_t value)63 void write_to_log(char const* valuename, std::size_t value) 64 { 65 LTM_(debug) << "topology: " 66 << valuename << ": " << value; //-V128 67 } 68 write_to_log_mask(char const * valuename,mask_cref_type value)69 void write_to_log_mask(char const* valuename, mask_cref_type value) 70 { 71 LTM_(debug) << "topology: " << valuename 72 << ": " HPX_CPU_MASK_PREFIX 73 << std::hex << value; 74 } 75 write_to_log(char const * valuename,std::vector<std::size_t> const & values)76 void write_to_log(char const* valuename, 77 std::vector<std::size_t> const& values) 78 { 79 LTM_(debug) << "topology: " 80 << valuename << "s, size: " //-V128 81 << values.size(); 82 83 std::size_t i = 0; 84 for (std::size_t value : values) 85 { 86 LTM_(debug) << "topology: " << valuename //-V128 87 << "(" << i++ << "): " << value; 88 } 89 } 90 write_to_log_mask(char const * valuename,std::vector<mask_type> const & values)91 void write_to_log_mask(char const* valuename, 92 std::vector<mask_type> const& values) 93 { 94 LTM_(debug) << "topology: " 95 << valuename << "s, size: " //-V128 96 << values.size(); 97 98 std::size_t i = 0; 99 for (mask_cref_type value : values) 100 { 101 LTM_(debug) << "topology: " << valuename //-V128 102 << "(" << i++ << "): " HPX_CPU_MASK_PREFIX 103 << std::hex << value; 104 } 105 } 106 get_index(hwloc_obj_t obj)107 std::size_t get_index(hwloc_obj_t obj) 108 { 109 // on Windows logical_index is always -1 110 if (obj->logical_index == ~0x0u) 111 return static_cast<std::size_t>(obj->os_index); 112 113 return static_cast<std::size_t>(obj->logical_index); 114 } 115 adjust_node_obj(hwloc_obj_t node)116 hwloc_obj_t adjust_node_obj(hwloc_obj_t node) noexcept 117 { 118 #if HWLOC_API_VERSION >= 0x00020000 119 // www.open-mpi.org/projects/hwloc/doc/hwloc-v2.0.0-letter.pdf: 120 // Starting with hwloc v2.0, NUMA nodes are not in the main tree 121 // anymore. They are attached under objects as Memory Children 122 // on the side of normal children. 123 while (hwloc_obj_type_is_memory(node->type)) 124 node = node->parent; 125 HPX_ASSERT(node); 126 #endif 127 return node; 128 } 129 }}} 130 131 namespace hpx { namespace threads 132 { 133 /////////////////////////////////////////////////////////////////////////// operator <<(std::ostream & os,hpx_hwloc_bitmap_wrapper const * bmp)134 std::ostream& operator<<(std::ostream& os, hpx_hwloc_bitmap_wrapper const* bmp) 135 { 136 char buffer[256]; 137 hwloc_bitmap_snprintf(buffer, 256, bmp->bmp_); 138 os << buffer; 139 return os; 140 } 141 142 /////////////////////////////////////////////////////////////////////////// get_service_affinity_mask(mask_cref_type used_processing_units,error_code & ec) const143 mask_type topology::get_service_affinity_mask( 144 mask_cref_type used_processing_units, error_code& ec) const 145 { 146 // We bind the service threads to the first NUMA domain. This is useful 147 // as the first NUMA domain is likely to have the PCI controllers etc. 148 mask_cref_type machine_mask = this->get_numa_node_affinity_mask(0, ec); 149 if (ec || !any(machine_mask)) 150 return mask_type(); 151 152 if (&ec != &throws) 153 ec = make_success_code(); 154 155 mask_type res = ~used_processing_units & machine_mask; 156 157 return (!any(res)) ? machine_mask : res; 158 } 159 reduce_thread_priority(error_code & ec) const160 bool topology::reduce_thread_priority(error_code& ec) const 161 { 162 #ifdef HPX_HAVE_NICE_THREADLEVEL 163 #if defined(__linux__) && !defined(__ANDROID__) && !defined(__bgq__) 164 pid_t tid; 165 tid = syscall(SYS_gettid); 166 if (setpriority(PRIO_PROCESS, tid, 19)) 167 { 168 HPX_THROWS_IF(ec, no_success, "topology::reduce_thread_priority", 169 "setpriority returned an error"); 170 return false; 171 } 172 #elif defined(WIN32) || defined(_WIN32) || defined(__WIN32__) 173 174 if (!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_LOWEST)) 175 { 176 HPX_THROWS_IF(ec, no_success, "topology::reduce_thread_priority", 177 "SetThreadPriority returned an error"); 178 return false; 179 } 180 #elif defined(__bgq__) 181 ThreadPriority_Low(); 182 #endif 183 #endif 184 return true; 185 } 186 187 /////////////////////////////////////////////////////////////////////////// 188 mask_type topology::empty_mask = mask_type(); 189 topology()190 topology::topology() 191 : topo(nullptr), machine_affinity_mask_(0) 192 { // {{{ 193 int err = hwloc_topology_init(&topo); 194 if (err != 0) 195 { 196 HPX_THROW_EXCEPTION(no_success, 197 "topology::topology", 198 "Failed to init hwloc topology"); 199 } 200 201 err = hwloc_topology_load(topo); 202 if (err != 0) 203 { 204 HPX_THROW_EXCEPTION(no_success, 205 "topology::topology", 206 "Failed to load hwloc topology"); 207 } 208 209 init_num_of_pus(); 210 211 socket_numbers_.reserve(num_of_pus_); 212 numa_node_numbers_.reserve(num_of_pus_); 213 core_numbers_.reserve(num_of_pus_); 214 215 // Initialize each set of data entirely, as some of the initialization 216 // routines rely on access to other pieces of topology data. The 217 // compiler will optimize the loops where possible anyways. 218 219 std::size_t num_of_sockets = get_number_of_sockets(); 220 if (num_of_sockets == 0) num_of_sockets = 1; 221 222 for (std::size_t i = 0; i < num_of_pus_; ++i) 223 { 224 std::size_t socket = init_socket_number(i); 225 HPX_ASSERT(socket < num_of_sockets); 226 socket_numbers_.push_back(socket); 227 } 228 229 std::size_t num_of_nodes = get_number_of_numa_nodes(); 230 if (num_of_nodes == 0) num_of_nodes = 1; 231 232 for (std::size_t i = 0; i < num_of_pus_; ++i) 233 { 234 std::size_t numa_node = init_numa_node_number(i); 235 HPX_ASSERT(numa_node < num_of_nodes); 236 numa_node_numbers_.push_back(numa_node); 237 } 238 239 std::size_t num_of_cores = get_number_of_cores(); 240 if (num_of_cores == 0) num_of_cores = 1; 241 242 for (std::size_t i = 0; i < num_of_pus_; ++i) 243 { 244 std::size_t core_number = init_core_number(i); 245 HPX_ASSERT(core_number < num_of_cores); 246 core_numbers_.push_back(core_number); 247 } 248 249 machine_affinity_mask_ = init_machine_affinity_mask(); 250 socket_affinity_masks_.reserve(num_of_pus_); 251 numa_node_affinity_masks_.reserve(num_of_pus_); 252 core_affinity_masks_.reserve(num_of_pus_); 253 thread_affinity_masks_.reserve(num_of_pus_); 254 255 for (std::size_t i = 0; i < num_of_pus_; ++i) 256 { 257 socket_affinity_masks_.push_back(init_socket_affinity_mask(i)); 258 } 259 260 for (std::size_t i = 0; i < num_of_pus_; ++i) 261 { 262 numa_node_affinity_masks_.push_back(init_numa_node_affinity_mask(i)); 263 } 264 265 for (std::size_t i = 0; i < num_of_pus_; ++i) 266 { 267 core_affinity_masks_.push_back(init_core_affinity_mask(i)); 268 } 269 270 for (std::size_t i = 0; i < num_of_pus_; ++i) 271 { 272 thread_affinity_masks_.push_back(init_thread_affinity_mask(i)); 273 } 274 } // }}} 275 write_to_log() const276 void topology::write_to_log() const 277 { 278 std::size_t num_of_sockets = get_number_of_sockets(); 279 if (num_of_sockets == 0) num_of_sockets = 1; 280 detail::write_to_log("num_sockets", num_of_sockets); 281 282 283 std::size_t num_of_nodes = get_number_of_numa_nodes(); 284 if (num_of_nodes == 0) num_of_nodes = 1; 285 detail::write_to_log("num_of_nodes", num_of_nodes); 286 287 std::size_t num_of_cores = get_number_of_cores(); 288 if (num_of_cores == 0) num_of_cores = 1; 289 detail::write_to_log("num_of_cores", num_of_cores); 290 291 detail::write_to_log("num_of_pus", num_of_pus_); 292 293 detail::write_to_log("socket_number", socket_numbers_); 294 detail::write_to_log("numa_node_number", numa_node_numbers_); 295 detail::write_to_log("core_number", core_numbers_); 296 297 detail::write_to_log_mask("machine_affinity_mask", machine_affinity_mask_); 298 299 detail::write_to_log_mask("socket_affinity_mask", socket_affinity_masks_); 300 detail::write_to_log_mask("numa_node_affinity_mask", numa_node_affinity_masks_); 301 detail::write_to_log_mask("core_affinity_mask", core_affinity_masks_); 302 detail::write_to_log_mask("thread_affinity_mask", thread_affinity_masks_); 303 } 304 ~topology()305 topology::~topology() 306 { 307 if (topo) 308 hwloc_topology_destroy(topo); 309 } 310 get_pu_number(std::size_t num_core,std::size_t num_pu,error_code & ec) const311 std::size_t topology::get_pu_number( 312 std::size_t num_core 313 , std::size_t num_pu 314 , error_code& ec 315 ) const 316 { // {{{ 317 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 318 319 int num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); 320 321 // If num_cores is smaller 0, we have an error, it should never be zero 322 // either to avoid division by zero, we should always have at least one 323 // core 324 if(num_cores <= 0) 325 { 326 HPX_THROWS_IF(ec, no_success, 327 "topology::hwloc_get_nobjs_by_type", 328 "Failed to get number of cores"); 329 return std::size_t(-1); 330 } 331 num_core %= num_cores; //-V101 //-V104 //-V107 332 333 hwloc_obj_t core_obj; 334 335 core_obj = hwloc_get_obj_by_type(topo, 336 HWLOC_OBJ_CORE, static_cast<unsigned>(num_core)); 337 338 num_pu %= core_obj->arity; //-V101 //-V104 339 340 return std::size_t(core_obj->children[num_pu]->logical_index); 341 } // }}} 342 343 /////////////////////////////////////////////////////////////////////////// get_machine_affinity_mask(error_code & ec) const344 mask_cref_type topology::get_machine_affinity_mask( 345 error_code& ec 346 ) const 347 { 348 if (&ec != &throws) 349 ec = make_success_code(); 350 351 return machine_affinity_mask_; 352 } 353 get_socket_affinity_mask(std::size_t num_thread,error_code & ec) const354 mask_cref_type topology::get_socket_affinity_mask( 355 std::size_t num_thread 356 , error_code& ec 357 ) const 358 { // {{{ 359 std::size_t num_pu = num_thread % num_of_pus_; 360 361 if (num_pu < socket_affinity_masks_.size()) 362 { 363 if (&ec != &throws) 364 ec = make_success_code(); 365 366 return socket_affinity_masks_[num_pu]; 367 } 368 369 HPX_THROWS_IF(ec, bad_parameter 370 , "hpx::threads::topology::get_socket_affinity_mask" 371 , hpx::util::format( 372 "thread number %1% is out of range", 373 num_thread)); 374 return empty_mask; 375 } // }}} 376 get_numa_node_affinity_mask(std::size_t num_thread,error_code & ec) const377 mask_cref_type topology::get_numa_node_affinity_mask( 378 std::size_t num_thread 379 , error_code& ec 380 ) const 381 { // {{{ 382 std::size_t num_pu = num_thread % num_of_pus_; 383 384 if (num_pu < numa_node_affinity_masks_.size()) 385 { 386 if (&ec != &throws) 387 ec = make_success_code(); 388 389 return numa_node_affinity_masks_[num_pu]; 390 } 391 392 HPX_THROWS_IF(ec, bad_parameter 393 , "hpx::threads::topology::get_numa_node_affinity_mask" 394 , hpx::util::format( 395 "thread number %1% is out of range", 396 num_thread)); 397 return empty_mask; 398 } // }}} 399 get_core_affinity_mask(std::size_t num_thread,error_code & ec) const400 mask_cref_type topology::get_core_affinity_mask( 401 std::size_t num_thread 402 , error_code& ec 403 ) const 404 { 405 std::size_t num_pu = num_thread % num_of_pus_; 406 407 if (num_pu < core_affinity_masks_.size()) 408 { 409 if (&ec != &throws) 410 ec = make_success_code(); 411 412 return core_affinity_masks_[num_pu]; 413 } 414 415 HPX_THROWS_IF(ec, bad_parameter 416 , "hpx::threads::topology::get_core_affinity_mask" 417 , hpx::util::format( 418 "thread number %1% is out of range", 419 num_thread)); 420 return empty_mask; 421 } 422 get_thread_affinity_mask(std::size_t num_thread,error_code & ec) const423 mask_cref_type topology::get_thread_affinity_mask( 424 std::size_t num_thread 425 , error_code& ec 426 ) const 427 { // {{{ 428 std::size_t num_pu = num_thread % num_of_pus_; 429 430 if (num_pu < thread_affinity_masks_.size()) 431 { 432 if (&ec != &throws) 433 ec = make_success_code(); 434 435 return thread_affinity_masks_[num_pu]; 436 } 437 438 HPX_THROWS_IF(ec, bad_parameter 439 , "hpx::threads::topology::get_thread_affinity_mask" 440 , hpx::util::format( 441 "thread number %1% is out of range", 442 num_thread)); 443 return empty_mask; 444 } // }}} 445 446 /////////////////////////////////////////////////////////////////////////// set_thread_affinity_mask(mask_cref_type mask,error_code & ec) const447 void topology::set_thread_affinity_mask( 448 mask_cref_type mask 449 , error_code& ec 450 ) const 451 { // {{{ 452 453 #if !defined(__APPLE__) 454 // setting thread affinities is not supported by OSX 455 hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); 456 457 int const pu_depth = 458 hwloc_get_type_or_below_depth(topo, HWLOC_OBJ_PU); 459 460 for (std::size_t i = 0; i != mask_size(mask); ++i) 461 { 462 if (test(mask, i)) 463 { 464 hwloc_obj_t const pu_obj = 465 hwloc_get_obj_by_depth(topo, pu_depth, unsigned(i)); 466 HPX_ASSERT(i == detail::get_index(pu_obj)); 467 hwloc_bitmap_set(cpuset, 468 static_cast<unsigned int>(pu_obj->os_index)); 469 } 470 } 471 472 { 473 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 474 if (hwloc_set_cpubind(topo, cpuset, 475 HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_THREAD)) 476 { 477 // Strict binding not supported or failed, try weak binding. 478 if (hwloc_set_cpubind(topo, cpuset, HWLOC_CPUBIND_THREAD)) 479 { 480 boost::scoped_ptr<char> buffer(new char [1024]); 481 482 hwloc_bitmap_snprintf(buffer.get(), 1024, cpuset); 483 hwloc_bitmap_free(cpuset); 484 485 HPX_THROWS_IF(ec, kernel_error 486 , "hpx::threads::topology::set_thread_affinity_mask" 487 , hpx::util::format( 488 "failed to set thread affinity mask (" 489 HPX_CPU_MASK_PREFIX "%x) for cpuset %s", 490 mask, buffer.get())); 491 return; 492 } 493 } 494 } 495 #if defined(__linux) || defined(linux) || defined(__linux__) || defined(__FreeBSD__) 496 sleep(0); // Allow the OS to pick up the change. 497 #endif 498 hwloc_bitmap_free(cpuset); 499 #endif // __APPLE__ 500 501 if (&ec != &throws) 502 ec = make_success_code(); 503 } // }}} 504 505 /////////////////////////////////////////////////////////////////////////// get_thread_affinity_mask_from_lva(naming::address_type lva,error_code & ec) const506 mask_type topology::get_thread_affinity_mask_from_lva( 507 naming::address_type lva 508 , error_code& ec 509 ) const 510 { // {{{ 511 if (&ec != &throws) 512 ec = make_success_code(); 513 514 hwloc_membind_policy_t policy = ::HWLOC_MEMBIND_DEFAULT; 515 hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); 516 517 { 518 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 519 int ret = 520 #if HWLOC_API_VERSION >= 0x00010b06 521 hwloc_get_area_membind(topo, reinterpret_cast<void const*>(lva), 522 1, nodeset, &policy, HWLOC_MEMBIND_BYNODESET); 523 #else 524 hwloc_get_area_membind_nodeset(topo, 525 reinterpret_cast<void const*>(lva), 1, nodeset, &policy, 0); 526 #endif 527 528 if (-1 != ret) 529 { 530 hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); 531 hwloc_cpuset_from_nodeset(topo, cpuset, nodeset); 532 lk.unlock(); 533 534 hwloc_bitmap_free(nodeset); 535 536 mask_type mask = mask_type(); 537 resize(mask, get_number_of_pus()); 538 539 int const pu_depth = 540 hwloc_get_type_or_below_depth(topo, HWLOC_OBJ_PU); 541 for (unsigned int i = 0; std::size_t(i) != num_of_pus_; ++i) 542 { 543 hwloc_obj_t const pu_obj = 544 hwloc_get_obj_by_depth(topo, pu_depth, i); 545 unsigned idx = static_cast<unsigned>(pu_obj->os_index); 546 if (hwloc_bitmap_isset(cpuset, idx) != 0) 547 set(mask, detail::get_index(pu_obj)); 548 } 549 550 hwloc_bitmap_free(cpuset); 551 return mask; 552 } 553 } 554 555 hwloc_bitmap_free(nodeset); 556 return empty_mask; 557 } // }}} 558 init_numa_node_number(std::size_t num_thread)559 std::size_t topology::init_numa_node_number(std::size_t num_thread) 560 { 561 #if HWLOC_API_VERSION >= 0x00020000 562 if (std::size_t(-1) == num_thread) 563 return std::size_t(-1); 564 565 std::size_t num_pu = (num_thread + pu_offset) % num_of_pus_; 566 567 hwloc_obj_t obj; 568 { 569 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 570 obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, 571 static_cast<unsigned>(num_pu)); 572 HPX_ASSERT(num_pu == detail::get_index(obj)); 573 } 574 575 hwloc_obj_t tmp = nullptr; 576 while ((tmp = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_NUMANODE, tmp)) 577 != nullptr) { 578 if (hwloc_bitmap_intersects(tmp->cpuset, obj->cpuset)) { 579 /* tmp matches, use it */ 580 return tmp->logical_index; 581 } 582 } 583 return 0; 584 #else 585 return init_node_number(num_thread, HWLOC_OBJ_NODE); 586 #endif 587 } 588 init_node_number(std::size_t num_thread,hwloc_obj_type_t type)589 std::size_t topology::init_node_number( 590 std::size_t num_thread, hwloc_obj_type_t type 591 ) 592 { // {{{ 593 if (std::size_t(-1) == num_thread) 594 return std::size_t(-1); 595 596 std::size_t num_pu = (num_thread + pu_offset) % num_of_pus_; 597 598 { 599 hwloc_obj_t obj; 600 601 { 602 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 603 obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, 604 static_cast<unsigned>(num_pu)); 605 HPX_ASSERT(num_pu == detail::get_index(obj)); 606 } 607 608 while (obj) 609 { 610 if (hwloc_compare_types(obj->type, type) == 0) 611 { 612 return detail::get_index(obj); 613 } 614 obj = obj->parent; 615 } 616 } 617 618 return 0; 619 } // }}} 620 extract_node_mask(hwloc_obj_t parent,mask_type & mask) const621 void topology::extract_node_mask( 622 hwloc_obj_t parent 623 , mask_type& mask 624 ) const 625 { // {{{ 626 hwloc_obj_t obj; 627 628 { 629 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 630 obj = hwloc_get_next_child(topo, parent, nullptr); 631 } 632 633 while (obj) 634 { 635 if (hwloc_compare_types(HWLOC_OBJ_PU, obj->type) == 0) 636 { 637 do { 638 set(mask, detail::get_index(obj)); //-V106 639 { 640 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 641 obj = hwloc_get_next_child(topo, parent, obj); 642 } 643 } while (obj != nullptr && 644 hwloc_compare_types(HWLOC_OBJ_PU, obj->type) == 0); 645 return; 646 } 647 648 extract_node_mask(obj, mask); 649 650 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 651 obj = hwloc_get_next_child(topo, parent, obj); 652 } 653 } // }}} 654 extract_node_count(hwloc_obj_t parent,hwloc_obj_type_t type,std::size_t count) const655 std::size_t topology::extract_node_count( 656 hwloc_obj_t parent 657 , hwloc_obj_type_t type 658 , std::size_t count 659 ) const 660 { // {{{ 661 hwloc_obj_t obj; 662 663 if(parent == nullptr) return count; 664 665 { 666 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 667 obj = hwloc_get_next_child(topo, parent, nullptr); 668 } 669 670 while (obj) 671 { 672 if (hwloc_compare_types(type, obj->type) == 0) 673 { 674 /* 675 do { 676 ++count; 677 { 678 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 679 obj = hwloc_get_next_child(topo, parent, obj); 680 } 681 } while (obj != nullptr && hwloc_compare_types(type, obj->type) == 0); 682 return count; 683 */ 684 ++count; 685 } 686 687 count = extract_node_count(obj, type, count); 688 689 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 690 obj = hwloc_get_next_child(topo, parent, obj); 691 } 692 693 return count; 694 } // }}} 695 get_number_of_sockets() const696 std::size_t topology::get_number_of_sockets() const 697 { 698 int nobjs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET); 699 if(0 > nobjs) 700 { 701 HPX_THROW_EXCEPTION(kernel_error 702 , "hpx::threads::topology::get_number_of_sockets" 703 , "hwloc_get_nbobjs_by_type failed"); 704 return std::size_t(nobjs); 705 } 706 return std::size_t(nobjs); 707 } 708 get_number_of_numa_nodes() const709 std::size_t topology::get_number_of_numa_nodes() const 710 { 711 int nobjs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_NUMANODE); 712 if(0 > nobjs) 713 { 714 HPX_THROW_EXCEPTION(kernel_error 715 , "hpx::threads::topology::get_number_of_numa_nodes" 716 , "hwloc_get_nbobjs_by_type failed"); 717 return std::size_t(nobjs); 718 } 719 return std::size_t(nobjs); 720 } 721 get_number_of_cores() const722 std::size_t topology::get_number_of_cores() const 723 { 724 int nobjs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); 725 // If num_cores is smaller 0, we have an error 726 if (0 > nobjs) 727 { 728 HPX_THROW_EXCEPTION(kernel_error 729 , "hpx::threads::topology::get_number_of_cores" 730 , "hwloc_get_nbobjs_by_type(HWLOC_OBJ_CORE) failed"); 731 return std::size_t(nobjs); 732 } 733 else if (0 == nobjs) 734 { 735 // some platforms report zero cores but might still report the 736 // number of PUs 737 nobjs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); 738 if (0 > nobjs) 739 { 740 HPX_THROW_EXCEPTION(kernel_error 741 , "hpx::threads::topology::get_number_of_cores" 742 , "hwloc_get_nbobjs_by_type(HWLOC_OBJ_PU) failed"); 743 return std::size_t(nobjs); 744 } 745 } 746 747 // the number of reported cores/pus should never be zero either to 748 // avoid division by zero, we should always have at least one core 749 if (0 == nobjs) 750 { 751 HPX_THROW_EXCEPTION(kernel_error 752 , "hpx::threads::topology::get_number_of_cores" 753 , "hwloc_get_nbobjs_by_type reports zero cores/pus"); 754 return std::size_t(nobjs); 755 } 756 757 return std::size_t(nobjs); 758 } 759 get_number_of_socket_pus(std::size_t num_socket) const760 std::size_t topology::get_number_of_socket_pus( 761 std::size_t num_socket 762 ) const 763 { 764 hwloc_obj_t socket_obj = nullptr; 765 766 { 767 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 768 socket_obj = hwloc_get_obj_by_type(topo, 769 HWLOC_OBJ_SOCKET, static_cast<unsigned>(num_socket)); 770 } 771 772 if (socket_obj) 773 { 774 HPX_ASSERT(num_socket == detail::get_index(socket_obj)); 775 std::size_t pu_count = 0; 776 return extract_node_count(socket_obj, HWLOC_OBJ_PU, pu_count); 777 } 778 779 return num_of_pus_; 780 } 781 get_number_of_numa_node_pus(std::size_t numa_node) const782 std::size_t topology::get_number_of_numa_node_pus( 783 std::size_t numa_node 784 ) const 785 { 786 hwloc_obj_t node_obj = nullptr; 787 788 { 789 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 790 node_obj = hwloc_get_obj_by_type(topo, 791 HWLOC_OBJ_NODE, static_cast<unsigned>(numa_node)); 792 } 793 794 if (node_obj) 795 { 796 HPX_ASSERT(numa_node == detail::get_index(node_obj)); 797 std::size_t pu_count = 0; 798 node_obj = detail::adjust_node_obj(node_obj); 799 return extract_node_count(node_obj, HWLOC_OBJ_PU, pu_count); 800 } 801 802 return num_of_pus_; 803 } 804 get_number_of_core_pus(std::size_t core) const805 std::size_t topology::get_number_of_core_pus( 806 std::size_t core 807 ) const 808 { 809 hwloc_obj_t core_obj = nullptr; 810 811 { 812 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 813 core_obj = hwloc_get_obj_by_type(topo, 814 HWLOC_OBJ_CORE, static_cast<unsigned>(core)); 815 } 816 817 if (core_obj) 818 { 819 HPX_ASSERT(core == detail::get_index(core_obj)); 820 std::size_t pu_count = 0; 821 return extract_node_count(core_obj, HWLOC_OBJ_PU, pu_count); 822 } 823 824 return num_of_pus_; 825 } 826 get_number_of_socket_cores(std::size_t num_socket) const827 std::size_t topology::get_number_of_socket_cores( 828 std::size_t num_socket 829 ) const 830 { 831 hwloc_obj_t socket_obj = nullptr; 832 833 { 834 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 835 socket_obj = hwloc_get_obj_by_type(topo, 836 HWLOC_OBJ_SOCKET, static_cast<unsigned>(num_socket)); 837 } 838 839 if (socket_obj) 840 { 841 HPX_ASSERT(num_socket == detail::get_index(socket_obj)); 842 std::size_t pu_count = 0; 843 return extract_node_count(socket_obj, HWLOC_OBJ_CORE, pu_count); 844 } 845 846 return get_number_of_cores(); 847 } 848 get_number_of_numa_node_cores(std::size_t numa_node) const849 std::size_t topology::get_number_of_numa_node_cores( 850 std::size_t numa_node 851 ) const 852 { 853 hwloc_obj_t node_obj = nullptr; 854 { 855 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 856 node_obj = hwloc_get_obj_by_type(topo, 857 HWLOC_OBJ_NODE, static_cast<unsigned>(numa_node)); 858 } 859 860 if (node_obj) 861 { 862 HPX_ASSERT(numa_node == detail::get_index(node_obj)); 863 std::size_t pu_count = 0; 864 node_obj = detail::adjust_node_obj(node_obj); 865 return extract_node_count(node_obj, HWLOC_OBJ_CORE, pu_count); 866 } 867 868 return get_number_of_cores(); 869 } 870 cpuset_to_nodeset(mask_cref_type mask) const871 hwloc_bitmap_ptr topology::cpuset_to_nodeset( 872 mask_cref_type mask) const 873 { 874 hwloc_bitmap_t cpuset = mask_to_bitmap(mask, HWLOC_OBJ_PU); 875 hwloc_bitmap_t nodeset = hwloc_bitmap_alloc(); 876 #if HWLOC_API_VERSION >= 0x00020000 877 hwloc_cpuset_to_nodeset(topo, cpuset, nodeset); 878 #else 879 hwloc_cpuset_to_nodeset_strict(topo, cpuset, nodeset); 880 #endif 881 hwloc_bitmap_free(cpuset); 882 return std::make_shared<hpx::threads::hpx_hwloc_bitmap_wrapper>(nodeset); 883 } 884 885 namespace detail 886 { print_info(std::ostream & os,hwloc_obj_t obj,char const * name,bool comma)887 void print_info(std::ostream& os, hwloc_obj_t obj, char const* name, 888 bool comma) 889 { 890 if (comma) 891 os << ", "; 892 os << name; 893 894 if (obj->logical_index != ~0x0u) 895 os << "L#" << obj->logical_index; 896 if (obj->os_index != ~0x0u) 897 os << "(P#" << obj->os_index << ")"; 898 } 899 print_info(std::ostream & os,hwloc_obj_t obj,bool comma=false)900 void print_info(std::ostream& os, hwloc_obj_t obj, bool comma = false) 901 { 902 switch (obj->type) { 903 case HWLOC_OBJ_PU: 904 print_info(os, obj, "PU ", comma); 905 break; 906 907 case HWLOC_OBJ_CORE: 908 print_info(os, obj, "Core ", comma); 909 break; 910 911 case HWLOC_OBJ_SOCKET: 912 print_info(os, obj, "Socket ", comma); 913 break; 914 915 case HWLOC_OBJ_NODE: 916 print_info(os, obj, "Node ", comma); 917 break; 918 919 default: 920 break; 921 } 922 } 923 } 924 print_affinity_mask(std::ostream & os,std::size_t num_thread,mask_cref_type m,const std::string & pool_name) const925 void topology::print_affinity_mask(std::ostream& os, 926 std::size_t num_thread, mask_cref_type m, const std::string &pool_name) const 927 { 928 boost::io::ios_flags_saver ifs(os); 929 bool first = true; 930 931 for(std::size_t i = 0; i != num_of_pus_; ++i) 932 { 933 934 hwloc_obj_t obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, unsigned(i)); 935 if (!obj) 936 { 937 HPX_THROW_EXCEPTION(kernel_error 938 , "hpx::threads::topology::print_affinity_mask" 939 , "object not found"); 940 return; 941 } 942 943 if(!test(m, detail::get_index(obj))) //-V106 944 continue; 945 946 if (first) { 947 first = false; 948 os << std::setw(4) << num_thread << ": "; //-V112 //-V128 949 } 950 else { 951 os << " "; 952 } 953 954 detail::print_info(os, obj); 955 956 while(obj->parent) 957 { 958 detail::print_info(os, obj->parent, true); 959 obj = obj->parent; 960 } 961 962 os << ", on pool \"" << pool_name << "\""; 963 964 os << std::endl; 965 } 966 } 967 init_machine_affinity_mask() const968 mask_type topology::init_machine_affinity_mask() const 969 { // {{{ 970 mask_type machine_affinity_mask = mask_type(); 971 resize(machine_affinity_mask, get_number_of_pus()); 972 973 hwloc_obj_t machine_obj; 974 { 975 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 976 machine_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_MACHINE, 0); 977 } 978 if (machine_obj) 979 { 980 extract_node_mask(machine_obj, machine_affinity_mask); 981 return machine_affinity_mask; 982 } 983 984 HPX_THROW_EXCEPTION(kernel_error 985 , "hpx::threads::topology::init_machine_affinity_mask" 986 , "failed to initialize machine affinity mask"); 987 return empty_mask; 988 } // }}} 989 init_socket_affinity_mask_from_socket(std::size_t num_socket) const990 mask_type topology::init_socket_affinity_mask_from_socket( 991 std::size_t num_socket 992 ) const 993 { // {{{ 994 // If we have only one or no socket, the socket affinity mask 995 // spans all processors 996 if (std::size_t(-1) == num_socket) 997 return machine_affinity_mask_; 998 999 hwloc_obj_t socket_obj = nullptr; 1000 { 1001 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 1002 socket_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 1003 static_cast<unsigned>(num_socket)); 1004 } 1005 1006 if (socket_obj) 1007 { 1008 HPX_ASSERT(num_socket == detail::get_index(socket_obj)); 1009 1010 mask_type socket_affinity_mask = mask_type(); 1011 resize(socket_affinity_mask, get_number_of_pus()); 1012 1013 extract_node_mask(socket_obj, socket_affinity_mask); 1014 return socket_affinity_mask; 1015 } 1016 1017 return machine_affinity_mask_; 1018 } // }}} 1019 init_numa_node_affinity_mask_from_numa_node(std::size_t numa_node) const1020 mask_type topology::init_numa_node_affinity_mask_from_numa_node( 1021 std::size_t numa_node 1022 ) const 1023 { // {{{ 1024 // If we have only one or no NUMA domain, the NUMA affinity mask 1025 // spans all processors 1026 if (std::size_t(-1) == numa_node) 1027 { 1028 return machine_affinity_mask_; 1029 } 1030 1031 hwloc_obj_t numa_node_obj = nullptr; 1032 { 1033 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 1034 numa_node_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_NODE, 1035 static_cast<unsigned>(numa_node)); 1036 } 1037 1038 if (numa_node_obj) 1039 { 1040 HPX_ASSERT(numa_node == detail::get_index(numa_node_obj)); 1041 mask_type node_affinity_mask = mask_type(); 1042 resize(node_affinity_mask, get_number_of_pus()); 1043 1044 numa_node_obj = detail::adjust_node_obj(numa_node_obj); 1045 extract_node_mask(numa_node_obj, node_affinity_mask); 1046 return node_affinity_mask; 1047 } 1048 1049 return machine_affinity_mask_; 1050 } // }}} 1051 init_core_affinity_mask_from_core(std::size_t core,mask_cref_type default_mask) const1052 mask_type topology::init_core_affinity_mask_from_core( 1053 std::size_t core, mask_cref_type default_mask 1054 ) const 1055 { // {{{ 1056 if (std::size_t(-1) == core) 1057 return default_mask; 1058 1059 hwloc_obj_t core_obj = nullptr; 1060 1061 std::size_t num_core = (core + core_offset) % get_number_of_cores(); 1062 1063 { 1064 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 1065 core_obj = hwloc_get_obj_by_type(topo, 1066 HWLOC_OBJ_CORE, static_cast<unsigned>(num_core)); 1067 } 1068 1069 if (core_obj) 1070 { 1071 HPX_ASSERT(num_core == detail::get_index(core_obj)); 1072 mask_type core_affinity_mask = mask_type(); 1073 resize(core_affinity_mask, get_number_of_pus()); 1074 1075 extract_node_mask(core_obj, core_affinity_mask); 1076 return core_affinity_mask; 1077 } 1078 1079 return default_mask; 1080 } // }}} 1081 init_thread_affinity_mask(std::size_t num_thread) const1082 mask_type topology::init_thread_affinity_mask( 1083 std::size_t num_thread 1084 ) const 1085 { // {{{ 1086 1087 if (std::size_t(-1) == num_thread) 1088 { 1089 return get_core_affinity_mask(num_thread); 1090 } 1091 1092 std::size_t num_pu = (num_thread + pu_offset) % num_of_pus_; 1093 1094 hwloc_obj_t obj = nullptr; 1095 1096 { 1097 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 1098 obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, 1099 static_cast<unsigned>(num_pu)); 1100 } 1101 1102 if (!obj) 1103 { 1104 return get_core_affinity_mask(num_thread); 1105 } 1106 1107 HPX_ASSERT(num_pu == detail::get_index(obj)); 1108 mask_type mask = mask_type(); 1109 resize(mask, get_number_of_pus()); 1110 1111 set(mask, detail::get_index(obj)); //-V106 1112 1113 return mask; 1114 } // }}} 1115 init_thread_affinity_mask(std::size_t num_core,std::size_t num_pu) const1116 mask_type topology::init_thread_affinity_mask( 1117 std::size_t num_core, 1118 std::size_t num_pu 1119 ) const 1120 { // {{{ 1121 hwloc_obj_t obj = nullptr; 1122 1123 { 1124 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 1125 int num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); 1126 // If num_cores is smaller 0, we have an error, it should never be zero 1127 // either to avoid division by zero, we should always have at least one 1128 // core 1129 if (num_cores <= 0) { 1130 HPX_THROW_EXCEPTION(kernel_error 1131 , "hpx::threads::topology::init_thread_affinity_mask" 1132 , "hwloc_get_nbobjs_by_type failed"); 1133 return empty_mask; 1134 } 1135 1136 num_core = (num_core + core_offset) % std::size_t(num_cores); 1137 obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 1138 static_cast<unsigned>(num_core)); 1139 } 1140 1141 if (!obj) 1142 return empty_mask;//get_core_affinity_mask(num_thread, false); 1143 1144 HPX_ASSERT(num_core == detail::get_index(obj)); 1145 1146 num_pu %= obj->arity; //-V101 //-V104 1147 1148 mask_type mask = mask_type(); 1149 resize(mask, get_number_of_pus()); 1150 1151 set(mask, detail::get_index(obj->children[num_pu])); //-V106 1152 1153 return mask; 1154 } // }}} 1155 1156 /////////////////////////////////////////////////////////////////////////// init_num_of_pus()1157 void topology::init_num_of_pus() 1158 { 1159 num_of_pus_ = 1; 1160 { 1161 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 1162 int num_of_pus = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); 1163 1164 if (num_of_pus > 0) 1165 { 1166 num_of_pus_ = static_cast<std::size_t>(num_of_pus); 1167 } 1168 } 1169 } 1170 get_number_of_pus() const1171 std::size_t topology::get_number_of_pus() const 1172 { 1173 return num_of_pus_; 1174 } 1175 1176 /////////////////////////////////////////////////////////////////////////// get_cpubind_mask(error_code & ec) const1177 mask_type topology::get_cpubind_mask(error_code& ec) const 1178 { 1179 hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); 1180 1181 mask_type mask = mask_type(); 1182 resize(mask, get_number_of_pus()); 1183 1184 { 1185 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 1186 if (hwloc_get_cpubind(topo, cpuset, HWLOC_CPUBIND_THREAD)) 1187 { 1188 hwloc_bitmap_free(cpuset); 1189 HPX_THROWS_IF(ec, kernel_error 1190 , "hpx::threads::topology::get_cpubind_mask" 1191 , "hwloc_get_cpubind failed"); 1192 return empty_mask; 1193 } 1194 1195 int const pu_depth = hwloc_get_type_or_below_depth(topo, HWLOC_OBJ_PU); 1196 for (unsigned int i = 0; i != num_of_pus_; ++i) //-V104 1197 { 1198 hwloc_obj_t const pu_obj = hwloc_get_obj_by_depth(topo, pu_depth, i); 1199 unsigned idx = static_cast<unsigned>(pu_obj->os_index); 1200 if (hwloc_bitmap_isset(cpuset, idx) != 0) 1201 set(mask, detail::get_index(pu_obj)); 1202 } 1203 } 1204 1205 hwloc_bitmap_free(cpuset); 1206 1207 if (&ec != &throws) 1208 ec = make_success_code(); 1209 1210 return mask; 1211 } 1212 get_cpubind_mask(compat::thread & handle,error_code & ec) const1213 mask_type topology::get_cpubind_mask(compat::thread& handle, 1214 error_code& ec) const 1215 { 1216 hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); 1217 1218 mask_type mask = mask_type(); 1219 resize(mask, get_number_of_pus()); 1220 1221 { 1222 std::unique_lock<hpx::util::spinlock> lk(topo_mtx); 1223 #if defined(HPX_MINGW) 1224 if (hwloc_get_thread_cpubind(topo, 1225 pthread_gethandle(handle.native_handle()), cpuset, 1226 HWLOC_CPUBIND_THREAD)) 1227 #else 1228 if (hwloc_get_thread_cpubind(topo, handle.native_handle(), cpuset, 1229 HWLOC_CPUBIND_THREAD)) 1230 #endif 1231 { 1232 hwloc_bitmap_free(cpuset); 1233 HPX_THROWS_IF(ec, kernel_error 1234 , "hpx::threads::topology::get_cpubind_mask" 1235 , "hwloc_get_cpubind failed"); 1236 return empty_mask; 1237 } 1238 1239 int const pu_depth = hwloc_get_type_or_below_depth(topo, HWLOC_OBJ_PU); 1240 for (unsigned int i = 0; i != num_of_pus_; ++i) //-V104 1241 { 1242 hwloc_obj_t const pu_obj = 1243 hwloc_get_obj_by_depth(topo, pu_depth, i); 1244 unsigned idx = static_cast<unsigned>(pu_obj->os_index); 1245 if (hwloc_bitmap_isset(cpuset, idx) != 0) 1246 set(mask, detail::get_index(pu_obj)); 1247 } 1248 } 1249 1250 hwloc_bitmap_free(cpuset); 1251 1252 if (&ec != &throws) 1253 ec = make_success_code(); 1254 1255 return mask; 1256 } 1257 1258 1259 /////////////////////////////////////////////////////////////////////////// 1260 /// This is equivalent to malloc(), except that it tries to allocate 1261 /// page-aligned memory from the OS. allocate(std::size_t len) const1262 void* topology::allocate(std::size_t len) const 1263 { 1264 return hwloc_alloc(topo, len); 1265 } 1266 1267 /////////////////////////////////////////////////////////////////////////// 1268 /// Allocate some memory on NUMA memory nodes specified by nodeset 1269 /// as specified by the hwloc hwloc_alloc_membind_nodeset call allocate_membind(std::size_t len,hwloc_bitmap_ptr bitmap,hpx_hwloc_membind_policy policy,int flags) const1270 void* topology::allocate_membind(std::size_t len, 1271 hwloc_bitmap_ptr bitmap, 1272 hpx_hwloc_membind_policy policy, int flags) const 1273 { 1274 return 1275 #if HWLOC_API_VERSION >= 0x00010b06 1276 hwloc_alloc_membind(topo, len, bitmap->get_bmp(), 1277 (hwloc_membind_policy_t)(policy), 1278 flags | HWLOC_MEMBIND_BYNODESET); 1279 #else 1280 hwloc_alloc_membind_nodeset(topo, len, bitmap->get_bmp(), 1281 (hwloc_membind_policy_t)(policy), flags); 1282 #endif 1283 } 1284 set_area_membind_nodeset(const void * addr,std::size_t len,void * nodeset) const1285 bool topology::set_area_membind_nodeset( 1286 const void *addr, std::size_t len, void *nodeset) const 1287 { 1288 #if !defined(__APPLE__) 1289 hwloc_membind_policy_t policy = ::HWLOC_MEMBIND_BIND; 1290 hwloc_nodeset_t ns = reinterpret_cast<hwloc_nodeset_t>(nodeset); 1291 int ret = 1292 #if HWLOC_API_VERSION >= 0x00010b06 1293 hwloc_set_area_membind( 1294 topo, addr, len, ns, policy, HWLOC_MEMBIND_BYNODESET); 1295 #else 1296 hwloc_set_area_membind_nodeset(topo, addr, len, ns, policy, 0); 1297 #endif 1298 1299 if (ret < 0) 1300 { 1301 std::string msg = std::strerror(errno); 1302 if (errno == ENOSYS) msg = "the action is not supported"; 1303 if (errno == EXDEV) msg = "the binding cannot be enforced"; 1304 HPX_THROW_EXCEPTION(kernel_error 1305 , "hpx::threads::topology::set_area_membind_nodeset" 1306 , "hwloc_set_area_membind_nodeset failed : " + msg); 1307 return false; 1308 } 1309 #endif 1310 return true; 1311 } 1312 1313 util::thread_specific_ptr<hpx_hwloc_bitmap_wrapper, topology::tls_tag> 1314 topology::bitmap_storage_; 1315 get_area_membind_nodeset(const void * addr,std::size_t len) const1316 threads::mask_type topology::get_area_membind_nodeset( 1317 const void *addr, std::size_t len) const 1318 { 1319 hpx_hwloc_bitmap_wrapper *nodeset = topology::bitmap_storage_.get(); 1320 if (nullptr == nodeset) 1321 { 1322 hwloc_bitmap_t nodeset_ = hwloc_bitmap_alloc(); 1323 topology::bitmap_storage_.reset(new hpx_hwloc_bitmap_wrapper(nodeset_)); 1324 nodeset = topology::bitmap_storage_.get(); 1325 } 1326 // 1327 hwloc_membind_policy_t policy; 1328 hwloc_nodeset_t ns = reinterpret_cast<hwloc_nodeset_t>(nodeset->get_bmp()); 1329 1330 if ( 1331 #if HWLOC_API_VERSION >= 0x00010b06 1332 hwloc_get_area_membind( 1333 topo, addr, len, ns, &policy, HWLOC_MEMBIND_BYNODESET) 1334 #else 1335 hwloc_get_area_membind_nodeset(topo, addr, len, ns, &policy, 0) 1336 #endif 1337 == -1) 1338 { 1339 HPX_THROW_EXCEPTION(kernel_error, 1340 "hpx::threads::topology::get_area_membind_nodeset", 1341 "hwloc_get_area_membind_nodeset failed"); 1342 return -1; 1343 std::cout << "error in "; 1344 } 1345 return bitmap_to_mask(ns, HWLOC_OBJ_NUMANODE); 1346 } 1347 get_numa_domain(const void * addr) const1348 int topology::get_numa_domain(const void *addr) const 1349 { 1350 #if HWLOC_API_VERSION >= 0x00010b06 1351 hpx_hwloc_bitmap_wrapper *nodeset = topology::bitmap_storage_.get(); 1352 if (nullptr == nodeset) 1353 { 1354 hwloc_bitmap_t nodeset_ = hwloc_bitmap_alloc(); 1355 topology::bitmap_storage_.reset(new hpx_hwloc_bitmap_wrapper(nodeset_)); 1356 nodeset = topology::bitmap_storage_.get(); 1357 } 1358 // 1359 hwloc_nodeset_t ns = reinterpret_cast<hwloc_nodeset_t>(nodeset->get_bmp()); 1360 1361 int ret = hwloc_get_area_memlocation(topo, addr, 1, ns, 1362 HWLOC_MEMBIND_BYNODESET); 1363 if (ret<0) { 1364 std::string msg(strerror(errno)); 1365 HPX_THROW_EXCEPTION(kernel_error 1366 , "hpx::threads::topology::get_numa_domain" 1367 , "hwloc_get_area_memlocation failed " + msg); 1368 return -1; 1369 } 1370 threads::mask_type mask = bitmap_to_mask(ns, HWLOC_OBJ_NUMANODE); 1371 return static_cast<int>(threads::find_first(mask)); 1372 #else 1373 return 0; 1374 #endif 1375 } 1376 1377 /// Free memory that was previously allocated by allocate deallocate(void * addr,std::size_t len) const1378 void topology::deallocate(void* addr, std::size_t len) const 1379 { 1380 hwloc_free(topo, addr, len); 1381 } 1382 1383 /////////////////////////////////////////////////////////////////////////// mask_to_bitmap(mask_cref_type mask,hwloc_obj_type_t htype) const1384 hwloc_bitmap_t topology::mask_to_bitmap(mask_cref_type mask, 1385 hwloc_obj_type_t htype) const 1386 { 1387 hwloc_bitmap_t bitmap = hwloc_bitmap_alloc(); 1388 hwloc_bitmap_zero(bitmap); 1389 // 1390 int const depth = 1391 hwloc_get_type_or_below_depth(topo, htype); 1392 1393 for (std::size_t i = 0; i != mask_size(mask); ++i) { 1394 if (test(mask, i)) { 1395 hwloc_obj_t const hw_obj = 1396 hwloc_get_obj_by_depth(topo, depth, unsigned(i)); 1397 HPX_ASSERT(i == detail::get_index(hw_obj)); 1398 hwloc_bitmap_set(bitmap, 1399 static_cast<unsigned int>(hw_obj->os_index)); 1400 } 1401 } 1402 return bitmap; 1403 } 1404 1405 /////////////////////////////////////////////////////////////////////////// bitmap_to_mask(hwloc_bitmap_t bitmap,hwloc_obj_type_t htype) const1406 mask_type topology::bitmap_to_mask(hwloc_bitmap_t bitmap, 1407 hwloc_obj_type_t htype) const 1408 { 1409 mask_type mask = mask_type(); 1410 std::size_t num = hwloc_get_nbobjs_by_type(topo, htype); 1411 // 1412 int const pu_depth = hwloc_get_type_or_below_depth(topo, htype); 1413 for (unsigned int i=0; std::size_t(i)!=num; ++i) //-V104 1414 { 1415 hwloc_obj_t const pu_obj = 1416 hwloc_get_obj_by_depth(topo, pu_depth, i); 1417 unsigned idx = static_cast<unsigned>(pu_obj->os_index); 1418 if (hwloc_bitmap_isset(bitmap, idx) != 0) 1419 set(mask, detail::get_index(pu_obj)); 1420 } 1421 return mask; 1422 } 1423 1424 /////////////////////////////////////////////////////////////////////////// print_mask_vector(std::ostream & os,std::vector<mask_type> const & v) const1425 void topology::print_mask_vector(std::ostream& os, 1426 std::vector<mask_type> const& v) const 1427 { 1428 std::size_t s = v.size(); 1429 if (s == 0) 1430 { 1431 os << "(empty)\n"; 1432 return; 1433 } 1434 1435 for (std::size_t i = 0; i != s; i++) 1436 { 1437 os << std::hex << HPX_CPU_MASK_PREFIX << v[i] << "\n"; 1438 } 1439 os << "\n"; 1440 } 1441 print_vector(std::ostream & os,std::vector<std::size_t> const & v) const1442 void topology::print_vector( 1443 std::ostream& os, std::vector<std::size_t> const& v) const 1444 { 1445 std::size_t s = v.size(); 1446 if (s == 0) 1447 { 1448 os << "(empty)\n"; 1449 return; 1450 } 1451 1452 os << v[0]; 1453 for (std::size_t i = 1; i != s; i++) 1454 { 1455 os << ", " << std::dec << v[i]; 1456 } 1457 os << "\n"; 1458 } 1459 print_hwloc(std::ostream & os) const1460 void topology::print_hwloc(std::ostream& os) const 1461 { 1462 os << "[HWLOC topology info] number of ...\n" << std::dec 1463 << "number of sockets : " << get_number_of_sockets() 1464 << "\n" 1465 << "number of numa nodes : " << get_number_of_numa_nodes() 1466 << "\n" 1467 << "number of cores : " << get_number_of_cores() << "\n" 1468 << "number of PUs : " << get_number_of_pus() << "\n" 1469 << "hardware concurrency : " 1470 << hpx::threads::hardware_concurrency() << "\n" << std::endl; 1471 //! -------------------------------------- topology (affinity masks) 1472 os << "[HWLOC topology info] affinity masks :\n" 1473 << "machine : \n" 1474 << std::hex << HPX_CPU_MASK_PREFIX 1475 << machine_affinity_mask_ << "\n"; 1476 1477 os << "socket : \n"; 1478 print_mask_vector(os, socket_affinity_masks_); 1479 os << "numa node : \n"; 1480 print_mask_vector(os, numa_node_affinity_masks_); 1481 os << "core : \n"; 1482 print_mask_vector(os, core_affinity_masks_); 1483 os << "PUs (/threads) : \n"; 1484 print_mask_vector(os, thread_affinity_masks_); 1485 1486 //! -------------------------------------- topology (numbers) 1487 os << "[HWLOC topology info] resource numbers :\n"; 1488 os << "socket : \n"; 1489 print_vector(os, socket_numbers_); 1490 os << "numa node : \n"; 1491 print_vector(os, numa_node_numbers_); 1492 os << "core : \n"; 1493 print_vector(os, core_numbers_); 1494 //os << "PUs (/threads) : \n"; 1495 //print_vector(os, pu_numbers_); 1496 } 1497 get_topology()1498 topology const& get_topology() 1499 { 1500 hpx::runtime* rt = hpx::get_runtime_ptr(); 1501 if (rt == nullptr) 1502 { 1503 HPX_THROW_EXCEPTION(invalid_status, "hpx::threads::get_topology", 1504 "the hpx runtime system has not been initialized yet"); 1505 } 1506 return rt->get_topology(); 1507 } 1508 1509 /////////////////////////////////////////////////////////////////////////// 1510 struct hardware_concurrency_tag {}; 1511 1512 struct hw_concurrency 1513 { hw_concurrencyhpx::threads::hw_concurrency1514 hw_concurrency() 1515 #if defined(__ANDROID__) && defined(ANDROID) 1516 : num_of_cores_(::android_getCpuCount()) 1517 #else 1518 : num_of_cores_(detail::hwloc_hardware_concurrency()) 1519 #endif 1520 { 1521 if (num_of_cores_ == 0) 1522 num_of_cores_ = 1; 1523 } 1524 1525 std::size_t num_of_cores_; 1526 }; 1527 hardware_concurrency()1528 std::size_t hardware_concurrency() 1529 { 1530 util::static_<hw_concurrency, hardware_concurrency_tag> hwc; 1531 return hwc.get().num_of_cores_; 1532 } 1533 }} 1534 1535