1 /*
2 * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
3 * Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
4 * All Rights reserved.
5 * Portions of this software copied from libfabric
6 * (https://github.com/ofiwg/libfabric)
7 *
8 * LICENSE_BEGIN
9 *
10 * BSD license:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
29 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
35 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 *
38 * LICENSE_END
39 *
40 *
41 */
42
43 #include "opal_config.h"
44
45 #include <errno.h>
46 #include <arpa/inet.h>
47 #include <time.h>
48 #ifdef HAVE_NETINET_IN_H
49 #include <netinet/in.h>
50 #endif
51
52 #include "libnl_utils.h"
53
54 /* Adapt this copied code for Open MPI */
55 #include "opal/util/output.h"
56
57
58 static struct nla_policy route_policy[RTA_MAX+1] = {
59 [RTA_IIF] = { .type = NLA_STRING,
60 .maxlen = IFNAMSIZ, },
61 [RTA_OIF] = { .type = NLA_U32 },
62 [RTA_PRIORITY] = { .type = NLA_U32 },
63 [RTA_FLOW] = { .type = NLA_U32 },
64 [RTA_MP_ALGO] = { .type = NLA_U32 },
65 [RTA_CACHEINFO] = { .minlen = sizeof(struct rta_cacheinfo) },
66 [RTA_METRICS] = { .type = NLA_NESTED },
67 [RTA_MULTIPATH] = { .type = NLA_NESTED },
68 };
69
opal_reachable_netlink_is_nlreply_expected(struct opal_reachable_netlink_sk * unlsk,struct nlmsghdr * nlm_hdr)70 static int opal_reachable_netlink_is_nlreply_expected(struct opal_reachable_netlink_sk *unlsk,
71 struct nlmsghdr *nlm_hdr)
72 {
73 #if OPAL_ENABLE_DEBUG
74 if (nlm_hdr->nlmsg_pid != nl_socket_get_local_port(unlsk->nlh)
75 || nlm_hdr->nlmsg_seq != unlsk->seq) {
76 opal_output(0, "Not an expected reply msg pid: %u local pid: %u msg seq: %u expected seq: %u\n",
77 nlm_hdr->nlmsg_pid,
78 nl_socket_get_local_port(unlsk->nlh),
79 nlm_hdr->nlmsg_seq, unlsk->seq);
80 return 0;
81 }
82 #endif
83
84 return 1;
85 }
86
opal_reachable_netlink_is_nlreply_err(struct nlmsghdr * nlm_hdr)87 static int opal_reachable_netlink_is_nlreply_err(struct nlmsghdr *nlm_hdr)
88 {
89 if (nlm_hdr->nlmsg_type == NLMSG_ERROR) {
90 struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr);
91 if (nlm_hdr->nlmsg_len >= (__u32)NLMSG_SIZE(sizeof(*e)))
92 opal_output_verbose(20, 0,
93 "Received a netlink error message");
94 else
95 opal_output_verbose(20, 0,
96 "Received a truncated netlink error message\n");
97 return 1;
98 }
99
100 return 0;
101 }
102
opal_reachable_netlink_send_query(struct opal_reachable_netlink_sk * unlsk,struct nl_msg * msg,int protocol,int flag)103 static int opal_reachable_netlink_send_query(struct opal_reachable_netlink_sk *unlsk,
104 struct nl_msg *msg,
105 int protocol, int flag)
106 {
107 struct nlmsghdr *nlhdr;
108
109 nlhdr = nlmsg_hdr(msg);
110 nlhdr->nlmsg_pid = nl_socket_get_local_port(unlsk->nlh);
111 nlhdr->nlmsg_seq = ++unlsk->seq;
112 nlmsg_set_proto(msg, protocol);
113 nlhdr->nlmsg_flags = flag;
114
115 return nl_send(unlsk->nlh, msg);
116 }
117
opal_reachable_netlink_set_rcvsk_timer(NL_HANDLE * nlh)118 static int opal_reachable_netlink_set_rcvsk_timer(NL_HANDLE *nlh)
119 {
120 int err = 0;
121 struct timeval timeout;
122
123 timeout.tv_sec = 1;
124 timeout.tv_usec = 0;
125
126 err = setsockopt(nl_socket_get_fd(nlh), SOL_SOCKET, SO_RCVTIMEO,
127 (char *)&timeout, sizeof(timeout));
128 #if OPAL_ENABLE_DEBUG
129 if (err < 0)
130 opal_output(0, "Failed to set SO_RCVTIMEO for nl socket");
131 #endif
132
133 return err;
134 }
135
opal_reachable_netlink_sk_alloc(struct opal_reachable_netlink_sk ** p_sk,int protocol)136 static int opal_reachable_netlink_sk_alloc(struct opal_reachable_netlink_sk **p_sk, int protocol)
137 {
138 struct opal_reachable_netlink_sk *unlsk;
139 NL_HANDLE *nlh;
140 int err;
141
142 unlsk = calloc(1, sizeof(*unlsk));
143 if (!unlsk) {
144 opal_output(0, "Failed to allocate opal_reachable_netlink_sk struct\n");
145 return ENOMEM;
146 }
147
148 nlh = NL_HANDLE_ALLOC();
149 if (!nlh) {
150 opal_output(0, "Failed to allocate nl handle\n");
151 err = ENOMEM;
152 goto err_free_unlsk;
153 }
154
155 err = nl_connect(nlh, protocol);
156 if (err < 0) {
157 opal_output(0, "Failed to connnect netlink route socket error: %s\n",
158 NL_GETERROR(err));
159 err = EINVAL;
160 goto err_free_nlh;
161 }
162
163 NL_DISABLE_SEQ_CHECK(nlh);
164 err = opal_reachable_netlink_set_rcvsk_timer(nlh);
165 if (err < 0)
166 goto err_close_nlh;
167
168 unlsk->nlh = nlh;
169 unlsk->seq = time(NULL);
170 *p_sk = unlsk;
171 return 0;
172
173 err_close_nlh:
174 nl_close(nlh);
175 err_free_nlh:
176 NL_HANDLE_FREE(nlh);
177 err_free_unlsk:
178 free(unlsk);
179 return err;
180 }
181
opal_reachable_netlink_sk_free(struct opal_reachable_netlink_sk * unlsk)182 static void opal_reachable_netlink_sk_free(struct opal_reachable_netlink_sk *unlsk)
183 {
184 nl_close(unlsk->nlh);
185 NL_HANDLE_FREE(unlsk->nlh);
186 free(unlsk);
187 }
188
opal_reachable_netlink_rt_raw_parse_cb(struct nl_msg * msg,void * arg)189 static int opal_reachable_netlink_rt_raw_parse_cb(struct nl_msg *msg, void *arg)
190 {
191 struct opal_reachable_netlink_rt_cb_arg *lookup_arg = (struct opal_reachable_netlink_rt_cb_arg *)arg;
192 struct opal_reachable_netlink_sk *unlsk = lookup_arg->unlsk;
193 struct nlmsghdr *nlm_hdr = nlmsg_hdr(msg);
194 struct rtmsg *rtm;
195 struct nlattr *tb[RTA_MAX + 1];
196 int found = 0;
197 int err;
198
199 INC_CB_MSGCNT(lookup_arg);
200
201 if (!opal_reachable_netlink_is_nlreply_expected(unlsk, nlm_hdr)) {
202 #if OPAL_ENABLE_DEBUG
203 nl_msg_dump(msg, stderr);
204 #endif
205 return NL_SKIP;
206 }
207
208 if (opal_reachable_netlink_is_nlreply_err(nlm_hdr)) {
209 #if OPAL_ENABLE_DEBUG
210 nl_msg_dump(msg, stderr);
211 #endif
212 return NL_SKIP;
213 }
214
215 if (nlm_hdr->nlmsg_type != RTM_NEWROUTE) {
216 #if OPAL_ENABLE_DEBUG
217 char buf[128];
218 nl_nlmsgtype2str(nlm_hdr->nlmsg_type, buf, sizeof(buf));
219 opal_output(0, "Received an invalid route request reply message type: %s\n",
220 buf);
221 nl_msg_dump(msg, stderr);
222 #endif
223 return NL_SKIP;
224 }
225
226 rtm = nlmsg_data(nlm_hdr);
227 if (rtm->rtm_family != AF_INET
228 #if OPAL_ENABLE_IPV6
229 && rtm->rtm_family != AF_INET6
230 #endif
231 ) {
232 #if OPAL_ENABLE_DEBUG
233 opal_output(0, "RTM message contains invalid AF family: %u\n",
234 rtm->rtm_family);
235 nl_msg_dump(msg, stderr);
236 #endif
237 return NL_SKIP;
238 }
239
240 err = nlmsg_parse(nlm_hdr, sizeof(struct rtmsg), tb, RTA_MAX,
241 route_policy);
242 if (err < 0) {
243 #if OPAL_ENABLE_DEBUG
244 opal_output(0, "nlmsg parse error %s\n", NL_GETERROR(err));
245 nl_msg_dump(msg, stderr);
246 #endif
247 return NL_SKIP;
248 }
249
250 if (tb[RTA_OIF]) {
251 if (nla_get_u32(tb[RTA_OIF]) == (uint32_t)lookup_arg->oif)
252 found = 1;
253 else
254 /* usually, this means that there is a route to the remote
255 host, but that it's not through the given interface. For
256 our purposes, that means it's not reachable. */
257 opal_output_verbose(20, 0, "Retrieved route has a different outgoing interface %d (expected %d)\n",
258 nla_get_u32(tb[RTA_OIF]),
259 lookup_arg->oif);
260 }
261
262 if (found && tb[RTA_GATEWAY]) {
263 lookup_arg->has_gateway = 1;
264 }
265 lookup_arg->found = found;
266 return NL_STOP;
267 }
268
opal_reachable_netlink_rt_lookup(uint32_t src_addr,uint32_t dst_addr,int outgoing_interface,int * has_gateway)269 int opal_reachable_netlink_rt_lookup(uint32_t src_addr,
270 uint32_t dst_addr,
271 int outgoing_interface,
272 int *has_gateway)
273 {
274 struct opal_reachable_netlink_sk *unlsk; /* netlink socket */
275 struct nl_msg *nlm; /* netlink message */
276 struct rtmsg rmsg; /* route message */
277 struct opal_reachable_netlink_rt_cb_arg arg; /* callback argument */
278 int err;
279
280 /* allocate netlink socket */
281 unlsk = NULL;
282 err = opal_reachable_netlink_sk_alloc(&unlsk, NETLINK_ROUTE);
283 if (err)
284 return err;
285
286 /* allocate route message */
287 memset(&rmsg, 0, sizeof(rmsg));
288 rmsg.rtm_family = AF_INET;
289 rmsg.rtm_dst_len = sizeof(dst_addr) * CHAR_BIT;
290 rmsg.rtm_src_len = sizeof(src_addr) * CHAR_BIT;
291
292 /* allocate netlink message of type RTM_GETROUTE */
293 nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0);
294 if (!nlm) {
295 opal_output(0, "Failed to alloc nl message, %s\n",
296 NL_GETERROR(err));
297 err = ENOMEM;
298 goto out;
299 }
300
301 /* append route message and addresses to netlink message. */
302 nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO);
303 nla_put_u32(nlm, RTA_DST, dst_addr);
304 nla_put_u32(nlm, RTA_SRC, src_addr);
305
306 /* query kernel */
307 err = opal_reachable_netlink_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST);
308 nlmsg_free(nlm);
309 if (err < 0) {
310 opal_output(0, "Failed to send RTM_GETROUTE query message, error %s\n",
311 NL_GETERROR(err));
312 err = EINVAL;
313 goto out;
314 }
315
316 /* Setup callback function */
317 memset(&arg, 0, sizeof(arg));
318 arg.oif = outgoing_interface;
319 arg.unlsk = unlsk;
320 err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM,
321 opal_reachable_netlink_rt_raw_parse_cb, &arg);
322 if (err != 0) {
323 opal_output(0, "Failed to setup callback function, error %s\n",
324 NL_GETERROR(err));
325 err = EINVAL;
326 goto out;
327 }
328
329 /* recieve results */
330 NL_RECVMSGS(unlsk->nlh, arg, EHOSTUNREACH, err, out);
331
332 /* check whether a route was found */
333 if (arg.found) {
334 *has_gateway = arg.has_gateway;
335 err = 0;
336 } else {
337 *has_gateway = 0;
338 err = EHOSTUNREACH;
339 }
340
341 out:
342 opal_reachable_netlink_sk_free(unlsk);
343 return err;
344 }
345
346
347 #if OPAL_ENABLE_IPV6
opal_reachable_netlink_rt_lookup6(struct in6_addr * src_addr,struct in6_addr * dst_addr,int outgoing_interface,int * has_gateway)348 int opal_reachable_netlink_rt_lookup6(struct in6_addr *src_addr,
349 struct in6_addr *dst_addr,
350 int outgoing_interface,
351 int *has_gateway)
352 {
353
354 struct opal_reachable_netlink_sk *unlsk; /* netlink socket */
355 struct nl_msg *nlm; /* netlink message */
356 struct rtmsg rmsg; /* route message */
357 struct opal_reachable_netlink_rt_cb_arg arg; /* callback argument */
358 int err;
359
360 /* allocate netlink socket */
361 unlsk = NULL;
362 err = opal_reachable_netlink_sk_alloc(&unlsk, NETLINK_ROUTE);
363 if (err)
364 return err;
365
366 /* allocate route message */
367 memset(&rmsg, 0, sizeof(rmsg));
368 rmsg.rtm_family = AF_INET6;
369 rmsg.rtm_dst_len = sizeof(*dst_addr) * CHAR_BIT;
370 rmsg.rtm_src_len = sizeof(*src_addr) * CHAR_BIT;
371
372 /* allocate netlink message of type RTM_GETROUTE */
373 nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0);
374 if (!nlm) {
375 opal_output(0, "Failed to alloc nl message, %s\n",
376 NL_GETERROR(err));
377 err = ENOMEM;
378 goto out;
379 }
380
381 /* append route message and addresses to netlink message. */
382 nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO);
383 nla_put(nlm, RTA_DST, sizeof(dst_addr->s6_addr), &(dst_addr->s6_addr));
384 nla_put(nlm, RTA_SRC, sizeof(src_addr->s6_addr), &(src_addr->s6_addr));
385
386 /* query kernel */
387 err = opal_reachable_netlink_send_query(unlsk, nlm, NETLINK_ROUTE, NLM_F_REQUEST);
388 nlmsg_free(nlm);
389 if (err < 0) {
390 opal_output(0, "Failed to send RTM_GETROUTE query message, error %s\n",
391 NL_GETERROR(err));
392 err = EINVAL;
393 goto out;
394 }
395
396 /* Setup callback function */
397 memset(&arg, 0, sizeof(arg));
398 arg.oif = outgoing_interface;
399 arg.unlsk = unlsk;
400 err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM,
401 opal_reachable_netlink_rt_raw_parse_cb, &arg);
402 if (err != 0) {
403 opal_output(0, "Failed to setup callback function, error %s\n",
404 NL_GETERROR(err));
405 err = EINVAL;
406 goto out;
407 }
408
409 /* receive results */
410 NL_RECVMSGS(unlsk->nlh, arg, EHOSTUNREACH, err, out);
411
412 /* check whether a route was found */
413 if (arg.found) {
414 *has_gateway = arg.has_gateway;
415 err = 0;
416 } else {
417 *has_gateway = 0;
418 err = EHOSTUNREACH;
419 }
420
421 out:
422 opal_reachable_netlink_sk_free(unlsk);
423 return err;
424 }
425 #endif /* #if OPAL_ENABLE_IPV6 */
426