1 //  Copyright (c) 2007-2012 Hartmut Kaiser
2 //  Copyright (c)      2013 Thomas Heller
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
5 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 
7 #include <hpx/exception.hpp>
8 #include <hpx/config/asio.hpp>
9 
10 #include <hpx/runtime/threads/topology.hpp>
11 #include <hpx/util/asio_util.hpp>
12 #include <hpx/util/batch_environment.hpp>
13 #include <hpx/util/runtime_configuration.hpp>
14 
15 #include <hpx/util/batch_environments/alps_environment.hpp>
16 #include <hpx/util/batch_environments/slurm_environment.hpp>
17 #include <hpx/util/batch_environments/pbs_environment.hpp>
18 
19 #include <cstddef>
20 #include <iostream>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include <boost/asio/io_service.hpp>
26 #include <boost/asio/ip/host_name.hpp>
27 
28 namespace hpx { namespace util
29 {
batch_environment(std::vector<std::string> & nodelist,util::runtime_configuration const & cfg,bool debug,bool enable)30     batch_environment::batch_environment(std::vector<std::string> & nodelist,
31             util::runtime_configuration const& cfg, bool debug, bool enable)
32       : agas_node_num_(0)
33       , node_num_(-1)
34       , num_threads_(-1)
35       , num_localities_(-1)
36       , debug_(debug)
37     {
38         if (!enable)
39             return;
40 
41         batch_environments::alps_environment alps_env(nodelist, debug);
42         if(alps_env.valid())
43         {
44             batch_name_ = "ALPS";
45             num_threads_ = alps_env.num_threads();
46             node_num_ = alps_env.node_num();
47             return;
48         }
49         batch_environments::slurm_environment slurm_env(nodelist, debug);
50         if(slurm_env.valid())
51         {
52             batch_name_ = "SLURM";
53             num_threads_ = slurm_env.num_threads();
54             num_localities_ = slurm_env.num_localities();
55             node_num_ = slurm_env.node_num();
56             return;
57         }
58         batch_environments::pbs_environment pbs_env(nodelist, debug, cfg);
59         if(pbs_env.valid())
60         {
61             batch_name_ = "PBS";
62             num_threads_ = pbs_env.num_threads();
63             num_localities_ = pbs_env.num_localities();
64             node_num_ = pbs_env.node_num();
65             return;
66         }
67     }
68 
69     // This function returns true if a batch environment was found.
found_batch_environment() const70     bool batch_environment::found_batch_environment() const
71     {
72         return !batch_name_.empty();
73     }
74 
75     // this function initializes the map of nodes from the given a list of nodes
init_from_nodelist(std::vector<std::string> const & nodes,std::string const & agas_host)76     std::string batch_environment::init_from_nodelist(
77         std::vector<std::string> const& nodes,
78         std::string const& agas_host)
79     {
80         if (debug_)
81             std::cerr << "got node list" << std::endl;
82 
83         boost::asio::io_service io_service;
84 
85         bool found_agas_host = false;
86         std::size_t agas_node_num = 0;
87         std::string nodes_list;
88         for (std::string s : nodes)
89         {
90             if (!s.empty()) {
91                 if (debug_)
92                     std::cerr << "extracted: '" << s << "'" << std::endl;
93 
94                 boost::asio::ip::tcp::endpoint ep =
95                     util::resolve_hostname(s, 0, io_service);
96 
97                 if (!found_agas_host &&
98                     ((agas_host.empty() && nodes_.empty()) || s == agas_host))
99                 {
100                     agas_node_ = s;
101                     found_agas_host = true;
102                     agas_node_num_ = agas_node_num;
103                 }
104 
105                 if (0 == nodes_.count(ep)) {
106                     if (debug_)
107                         std::cerr << "incrementing agas_node_num" << std::endl;
108                     ++agas_node_num;
109                 }
110 
111                 std::pair<std::string, std::size_t>& data = nodes_[ep];
112                 if (data.first.empty())
113                     data.first = s;
114                 ++data.second;
115 
116                 nodes_list += s + ' ';
117             }
118         }
119 
120         // if an AGAS host is specified, it needs to be in the list
121         // of nodes participating in this run
122         if (!agas_host.empty() && !found_agas_host) {
123             throw hpx::detail::command_line_error("Requested AGAS host (" + agas_host +
124                 ") not found in node list");
125         }
126 
127         if (debug_) {
128             if (!agas_node_.empty()) {
129                 std::cerr << "using AGAS host: '" << agas_node_
130                     << "' (node number " << agas_node_num_ << ")" << std::endl;
131             }
132 
133             std::cerr << "Nodes from nodelist:" << std::endl;
134             node_map_type::const_iterator end = nodes_.end();
135             for (node_map_type::const_iterator it = nodes_.begin();
136                  it != end; ++it)
137             {
138                 std::cerr << (*it).second.first << ": "
139                     << (*it).second.second << " (" << (*it).first << ")"
140                     << std::endl;
141             }
142         }
143         return nodes_list;
144     }
145 
146     // The number of threads is either one (if no PBS/SLURM information was
147     // found), or it is the same as the number of times this node has
148     // been listed in the node file. Additionally this takes into account
149     // the number of tasks run on this node.
retrieve_number_of_threads() const150     std::size_t batch_environment::retrieve_number_of_threads() const
151     {
152         return num_threads_;
153     }
154 
155     // The number of localities is either one (if no PBS information
156     // was found), or it is the same as the number of distinct node
157     // names listed in the node file. In case of SLURM we can extract
158     // the number of localities from the job environment.
retrieve_number_of_localities() const159     std::size_t batch_environment::retrieve_number_of_localities() const
160     {
161         return num_localities_;
162     }
163 
164     // Try to retrieve the node number from the PBS/SLURM environment
retrieve_node_number() const165     std::size_t batch_environment::retrieve_node_number() const
166     {
167         return node_num_;
168     }
169 
host_name() const170     std::string batch_environment::host_name() const
171     {
172         std::string hostname = boost::asio::ip::host_name();
173         if (debug_)
174             std::cerr << "asio host_name: " << hostname << std::endl;
175         return hostname;
176     }
177 
host_name(std::string const & def_hpx_name) const178     std::string batch_environment::host_name(std::string const& def_hpx_name) const
179     {
180         std::string host = nodes_.empty() ? def_hpx_name : host_name();
181         if (debug_)
182             std::cerr << "host_name: " << host << std::endl;
183         return host;
184     }
185 
186     // We either select the first host listed in the node file or a given
187     // host name to host the AGAS server.
agas_host_name(std::string const & def_agas) const188     std::string batch_environment::agas_host_name(std::string const& def_agas) const
189     {
190         std::string host = agas_node_.empty() ? def_agas : agas_node_;
191         if (debug_)
192             std::cerr << "agas host_name: " << host << std::endl;
193         return host;
194     }
195 
196     // Return a string containing the name of the batch system
get_batch_name() const197     std::string batch_environment::get_batch_name() const
198     {
199         return batch_name_;
200     }
201 }}
202 
203