1 // Copyright (c) 2007-2012 Hartmut Kaiser 2 // Copyright (c) 2013 Thomas Heller 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 7 #include <hpx/exception.hpp> 8 #include <hpx/config/asio.hpp> 9 10 #include <hpx/runtime/threads/topology.hpp> 11 #include <hpx/util/asio_util.hpp> 12 #include <hpx/util/batch_environment.hpp> 13 #include <hpx/util/runtime_configuration.hpp> 14 15 #include <hpx/util/batch_environments/alps_environment.hpp> 16 #include <hpx/util/batch_environments/slurm_environment.hpp> 17 #include <hpx/util/batch_environments/pbs_environment.hpp> 18 19 #include <cstddef> 20 #include <iostream> 21 #include <string> 22 #include <utility> 23 #include <vector> 24 25 #include <boost/asio/io_service.hpp> 26 #include <boost/asio/ip/host_name.hpp> 27 28 namespace hpx { namespace util 29 { batch_environment(std::vector<std::string> & nodelist,util::runtime_configuration const & cfg,bool debug,bool enable)30 batch_environment::batch_environment(std::vector<std::string> & nodelist, 31 util::runtime_configuration const& cfg, bool debug, bool enable) 32 : agas_node_num_(0) 33 , node_num_(-1) 34 , num_threads_(-1) 35 , num_localities_(-1) 36 , debug_(debug) 37 { 38 if (!enable) 39 return; 40 41 batch_environments::alps_environment alps_env(nodelist, debug); 42 if(alps_env.valid()) 43 { 44 batch_name_ = "ALPS"; 45 num_threads_ = alps_env.num_threads(); 46 node_num_ = alps_env.node_num(); 47 return; 48 } 49 batch_environments::slurm_environment slurm_env(nodelist, debug); 50 if(slurm_env.valid()) 51 { 52 batch_name_ = "SLURM"; 53 num_threads_ = slurm_env.num_threads(); 54 num_localities_ = slurm_env.num_localities(); 55 node_num_ = slurm_env.node_num(); 56 return; 57 } 58 batch_environments::pbs_environment pbs_env(nodelist, debug, cfg); 59 if(pbs_env.valid()) 60 { 61 batch_name_ = "PBS"; 62 num_threads_ = pbs_env.num_threads(); 63 num_localities_ = pbs_env.num_localities(); 64 node_num_ = pbs_env.node_num(); 65 return; 66 } 67 } 68 69 // This function returns true if a batch environment was found. found_batch_environment() const70 bool batch_environment::found_batch_environment() const 71 { 72 return !batch_name_.empty(); 73 } 74 75 // this function initializes the map of nodes from the given a list of nodes init_from_nodelist(std::vector<std::string> const & nodes,std::string const & agas_host)76 std::string batch_environment::init_from_nodelist( 77 std::vector<std::string> const& nodes, 78 std::string const& agas_host) 79 { 80 if (debug_) 81 std::cerr << "got node list" << std::endl; 82 83 boost::asio::io_service io_service; 84 85 bool found_agas_host = false; 86 std::size_t agas_node_num = 0; 87 std::string nodes_list; 88 for (std::string s : nodes) 89 { 90 if (!s.empty()) { 91 if (debug_) 92 std::cerr << "extracted: '" << s << "'" << std::endl; 93 94 boost::asio::ip::tcp::endpoint ep = 95 util::resolve_hostname(s, 0, io_service); 96 97 if (!found_agas_host && 98 ((agas_host.empty() && nodes_.empty()) || s == agas_host)) 99 { 100 agas_node_ = s; 101 found_agas_host = true; 102 agas_node_num_ = agas_node_num; 103 } 104 105 if (0 == nodes_.count(ep)) { 106 if (debug_) 107 std::cerr << "incrementing agas_node_num" << std::endl; 108 ++agas_node_num; 109 } 110 111 std::pair<std::string, std::size_t>& data = nodes_[ep]; 112 if (data.first.empty()) 113 data.first = s; 114 ++data.second; 115 116 nodes_list += s + ' '; 117 } 118 } 119 120 // if an AGAS host is specified, it needs to be in the list 121 // of nodes participating in this run 122 if (!agas_host.empty() && !found_agas_host) { 123 throw hpx::detail::command_line_error("Requested AGAS host (" + agas_host + 124 ") not found in node list"); 125 } 126 127 if (debug_) { 128 if (!agas_node_.empty()) { 129 std::cerr << "using AGAS host: '" << agas_node_ 130 << "' (node number " << agas_node_num_ << ")" << std::endl; 131 } 132 133 std::cerr << "Nodes from nodelist:" << std::endl; 134 node_map_type::const_iterator end = nodes_.end(); 135 for (node_map_type::const_iterator it = nodes_.begin(); 136 it != end; ++it) 137 { 138 std::cerr << (*it).second.first << ": " 139 << (*it).second.second << " (" << (*it).first << ")" 140 << std::endl; 141 } 142 } 143 return nodes_list; 144 } 145 146 // The number of threads is either one (if no PBS/SLURM information was 147 // found), or it is the same as the number of times this node has 148 // been listed in the node file. Additionally this takes into account 149 // the number of tasks run on this node. retrieve_number_of_threads() const150 std::size_t batch_environment::retrieve_number_of_threads() const 151 { 152 return num_threads_; 153 } 154 155 // The number of localities is either one (if no PBS information 156 // was found), or it is the same as the number of distinct node 157 // names listed in the node file. In case of SLURM we can extract 158 // the number of localities from the job environment. retrieve_number_of_localities() const159 std::size_t batch_environment::retrieve_number_of_localities() const 160 { 161 return num_localities_; 162 } 163 164 // Try to retrieve the node number from the PBS/SLURM environment retrieve_node_number() const165 std::size_t batch_environment::retrieve_node_number() const 166 { 167 return node_num_; 168 } 169 host_name() const170 std::string batch_environment::host_name() const 171 { 172 std::string hostname = boost::asio::ip::host_name(); 173 if (debug_) 174 std::cerr << "asio host_name: " << hostname << std::endl; 175 return hostname; 176 } 177 host_name(std::string const & def_hpx_name) const178 std::string batch_environment::host_name(std::string const& def_hpx_name) const 179 { 180 std::string host = nodes_.empty() ? def_hpx_name : host_name(); 181 if (debug_) 182 std::cerr << "host_name: " << host << std::endl; 183 return host; 184 } 185 186 // We either select the first host listed in the node file or a given 187 // host name to host the AGAS server. agas_host_name(std::string const & def_agas) const188 std::string batch_environment::agas_host_name(std::string const& def_agas) const 189 { 190 std::string host = agas_node_.empty() ? def_agas : agas_node_; 191 if (debug_) 192 std::cerr << "agas host_name: " << host << std::endl; 193 return host; 194 } 195 196 // Return a string containing the name of the batch system get_batch_name() const197 std::string batch_environment::get_batch_name() const 198 { 199 return batch_name_; 200 } 201 }} 202 203