1 /* $OpenBSD: rtsock.c,v 1.373 2023/12/03 10:51:17 mvs Exp $ */
2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1988, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80
81 #include <netinet/in.h>
82
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97
98 #define ROUTESNDQ 8192
99 #define ROUTERCVQ 8192
100
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102
103 struct walkarg {
104 int w_op, w_arg, w_tmemsize;
105 size_t w_given, w_needed;
106 caddr_t w_where, w_tmem;
107 };
108
109 void route_prinit(void);
110 void rcb_ref(void *, void *);
111 void rcb_unref(void *, void *);
112 int route_output(struct mbuf *, struct socket *);
113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int route_attach(struct socket *, int, int);
115 int route_detach(struct socket *);
116 int route_disconnect(struct socket *);
117 int route_shutdown(struct socket *);
118 void route_rcvd(struct socket *);
119 int route_send(struct socket *, struct mbuf *, struct mbuf *,
120 struct mbuf *);
121 int route_sockaddr(struct socket *, struct mbuf *);
122 int route_peeraddr(struct socket *, struct mbuf *);
123 void route_input(struct mbuf *m0, struct socket *, sa_family_t);
124 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
125 int route_cleargateway(struct rtentry *, void *, unsigned int);
126 void rtm_senddesync_timer(void *);
127 void rtm_senddesync(struct socket *);
128 int rtm_sendup(struct socket *, struct mbuf *);
129
130 int rtm_getifa(struct rt_addrinfo *, unsigned int);
131 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
132 uint8_t, unsigned int);
133 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
134 struct mbuf *rtm_msg1(int, struct rt_addrinfo *);
135 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
136 struct walkarg *);
137 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
138 int rtm_validate_proposal(struct rt_addrinfo *);
139 void rtm_setmetrics(u_long, const struct rt_metrics *,
140 struct rt_kmetrics *);
141 void rtm_getmetrics(const struct rtentry *,
142 struct rt_metrics *);
143
144 int sysctl_iflist(int, struct walkarg *);
145 int sysctl_ifnames(struct walkarg *);
146 int sysctl_rtable_rtstat(void *, size_t *, void *);
147
148 int rt_setsource(unsigned int, const struct sockaddr *);
149
150 /*
151 * Locks used to protect struct members
152 * I immutable after creation
153 * s solock
154 */
155 struct rtpcb {
156 struct socket *rop_socket; /* [I] */
157
158 SRPL_ENTRY(rtpcb) rop_list;
159 struct refcnt rop_refcnt;
160 struct timeout rop_timeout;
161 unsigned int rop_msgfilter; /* [s] */
162 unsigned int rop_flagfilter; /* [s] */
163 unsigned int rop_flags; /* [s] */
164 u_int rop_rtableid; /* [s] */
165 unsigned short rop_proto; /* [I] */
166 u_char rop_priority; /* [s] */
167 };
168 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb)
169
170 struct rtptable {
171 SRPL_HEAD(, rtpcb) rtp_list;
172 struct srpl_rc rtp_rc;
173 struct rwlock rtp_lk;
174 unsigned int rtp_count;
175 };
176
177 struct pool rtpcb_pool;
178 struct rtptable rtptable;
179
180 /*
181 * These flags and timeout are used for indicating to userland (via a
182 * RTM_DESYNC msg) when the route socket has overflowed and messages
183 * have been lost.
184 */
185 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */
186 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before
187 queueing more packets */
188
189 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */
190
191 void
route_prinit(void)192 route_prinit(void)
193 {
194 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
195 rw_init(&rtptable.rtp_lk, "rtsock");
196 SRPL_INIT(&rtptable.rtp_list);
197 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
198 IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
199 }
200
201 void
rcb_ref(void * null,void * v)202 rcb_ref(void *null, void *v)
203 {
204 struct rtpcb *rop = v;
205
206 refcnt_take(&rop->rop_refcnt);
207 }
208
209 void
rcb_unref(void * null,void * v)210 rcb_unref(void *null, void *v)
211 {
212 struct rtpcb *rop = v;
213
214 refcnt_rele_wake(&rop->rop_refcnt);
215 }
216
217 int
route_attach(struct socket * so,int proto,int wait)218 route_attach(struct socket *so, int proto, int wait)
219 {
220 struct rtpcb *rop;
221 int error;
222
223 error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
224 if (error)
225 return (error);
226 /*
227 * use the rawcb but allocate a rtpcb, this
228 * code does not care about the additional fields
229 * and works directly on the raw socket.
230 */
231 rop = pool_get(&rtpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
232 PR_ZERO);
233 if (rop == NULL)
234 return (ENOBUFS);
235 so->so_pcb = rop;
236 /* Init the timeout structure */
237 timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so,
238 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
239 refcnt_init(&rop->rop_refcnt);
240
241 rop->rop_socket = so;
242 rop->rop_proto = proto;
243
244 rop->rop_rtableid = curproc->p_p->ps_rtableid;
245
246 soisconnected(so);
247 so->so_options |= SO_USELOOPBACK;
248
249 rw_enter(&rtptable.rtp_lk, RW_WRITE);
250 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
251 rop_list);
252 rtptable.rtp_count++;
253 rw_exit(&rtptable.rtp_lk);
254
255 return (0);
256 }
257
258 int
route_detach(struct socket * so)259 route_detach(struct socket *so)
260 {
261 struct rtpcb *rop;
262
263 soassertlocked(so);
264
265 rop = sotortpcb(so);
266 if (rop == NULL)
267 return (EINVAL);
268
269 rw_enter(&rtptable.rtp_lk, RW_WRITE);
270
271 rtptable.rtp_count--;
272 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
273 rop_list);
274 rw_exit(&rtptable.rtp_lk);
275
276 sounlock(so);
277
278 /* wait for all references to drop */
279 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
280 timeout_del_barrier(&rop->rop_timeout);
281
282 solock(so);
283
284 so->so_pcb = NULL;
285 KASSERT((so->so_state & SS_NOFDREF) == 0);
286 pool_put(&rtpcb_pool, rop);
287
288 return (0);
289 }
290
291 int
route_disconnect(struct socket * so)292 route_disconnect(struct socket *so)
293 {
294 soisdisconnected(so);
295 return (0);
296 }
297
298 int
route_shutdown(struct socket * so)299 route_shutdown(struct socket *so)
300 {
301 socantsendmore(so);
302 return (0);
303 }
304
305 void
route_rcvd(struct socket * so)306 route_rcvd(struct socket *so)
307 {
308 struct rtpcb *rop = sotortpcb(so);
309
310 soassertlocked(so);
311
312 /*
313 * If we are in a FLUSH state, check if the buffer is
314 * empty so that we can clear the flag.
315 */
316 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
317 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
318 rop->rop_socket->so_rcv.sb_hiwat)))
319 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
320 }
321
322 int
route_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)323 route_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
324 struct mbuf *control)
325 {
326 int error;
327
328 soassertlocked(so);
329
330 if (control && control->m_len) {
331 error = EOPNOTSUPP;
332 goto out;
333 }
334
335 if (nam) {
336 error = EISCONN;
337 goto out;
338 }
339
340 error = route_output(m, so);
341 m = NULL;
342
343 out:
344 m_freem(control);
345 m_freem(m);
346
347 return (error);
348 }
349
350 int
route_sockaddr(struct socket * so,struct mbuf * nam)351 route_sockaddr(struct socket *so, struct mbuf *nam)
352 {
353 return (EINVAL);
354 }
355
356 int
route_peeraddr(struct socket * so,struct mbuf * nam)357 route_peeraddr(struct socket *so, struct mbuf *nam)
358 {
359 /* minimal support, just implement a fake peer address */
360 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
361 nam->m_len = route_src.sa_len;
362 return (0);
363 }
364
365 int
route_ctloutput(int op,struct socket * so,int level,int optname,struct mbuf * m)366 route_ctloutput(int op, struct socket *so, int level, int optname,
367 struct mbuf *m)
368 {
369 struct rtpcb *rop = sotortpcb(so);
370 int error = 0;
371 unsigned int tid, prio;
372
373 if (level != AF_ROUTE)
374 return (EINVAL);
375
376 switch (op) {
377 case PRCO_SETOPT:
378 switch (optname) {
379 case ROUTE_MSGFILTER:
380 if (m == NULL || m->m_len != sizeof(unsigned int))
381 error = EINVAL;
382 else
383 rop->rop_msgfilter = *mtod(m, unsigned int *);
384 break;
385 case ROUTE_TABLEFILTER:
386 if (m == NULL || m->m_len != sizeof(unsigned int)) {
387 error = EINVAL;
388 break;
389 }
390 tid = *mtod(m, unsigned int *);
391 if (tid != RTABLE_ANY && !rtable_exists(tid))
392 error = ENOENT;
393 else
394 rop->rop_rtableid = tid;
395 break;
396 case ROUTE_PRIOFILTER:
397 if (m == NULL || m->m_len != sizeof(unsigned int)) {
398 error = EINVAL;
399 break;
400 }
401 prio = *mtod(m, unsigned int *);
402 if (prio > RTP_MAX)
403 error = EINVAL;
404 else
405 rop->rop_priority = prio;
406 break;
407 case ROUTE_FLAGFILTER:
408 if (m == NULL || m->m_len != sizeof(unsigned int))
409 error = EINVAL;
410 else
411 rop->rop_flagfilter = *mtod(m, unsigned int *);
412 break;
413 default:
414 error = ENOPROTOOPT;
415 break;
416 }
417 break;
418 case PRCO_GETOPT:
419 switch (optname) {
420 case ROUTE_MSGFILTER:
421 m->m_len = sizeof(unsigned int);
422 *mtod(m, unsigned int *) = rop->rop_msgfilter;
423 break;
424 case ROUTE_TABLEFILTER:
425 m->m_len = sizeof(unsigned int);
426 *mtod(m, unsigned int *) = rop->rop_rtableid;
427 break;
428 case ROUTE_PRIOFILTER:
429 m->m_len = sizeof(unsigned int);
430 *mtod(m, unsigned int *) = rop->rop_priority;
431 break;
432 case ROUTE_FLAGFILTER:
433 m->m_len = sizeof(unsigned int);
434 *mtod(m, unsigned int *) = rop->rop_flagfilter;
435 break;
436 default:
437 error = ENOPROTOOPT;
438 break;
439 }
440 }
441 return (error);
442 }
443
444 void
rtm_senddesync_timer(void * xso)445 rtm_senddesync_timer(void *xso)
446 {
447 struct socket *so = xso;
448
449 solock(so);
450 rtm_senddesync(so);
451 sounlock(so);
452 }
453
454 void
rtm_senddesync(struct socket * so)455 rtm_senddesync(struct socket *so)
456 {
457 struct rtpcb *rop = sotortpcb(so);
458 struct mbuf *desync_mbuf;
459
460 soassertlocked(so);
461
462 /*
463 * Dying socket is disconnected by upper layer and there is
464 * no reason to send packet. Also we shouldn't reschedule
465 * timeout(9), otherwise timeout_del_barrier(9) can't help us.
466 */
467 if ((so->so_state & SS_ISCONNECTED) == 0 ||
468 (so->so_rcv.sb_state & SS_CANTRCVMORE))
469 return;
470
471 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
472 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
473 return;
474
475 /*
476 * If we fail to alloc memory or if sbappendaddr()
477 * fails, re-add timeout and try again.
478 */
479 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
480 if (desync_mbuf != NULL) {
481 if (sbappendaddr(so, &so->so_rcv, &route_src,
482 desync_mbuf, NULL) != 0) {
483 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
484 sorwakeup(rop->rop_socket);
485 return;
486 }
487 m_freem(desync_mbuf);
488 }
489 /* Re-add timeout to try sending msg again */
490 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
491 }
492
493 void
route_input(struct mbuf * m0,struct socket * so0,sa_family_t sa_family)494 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
495 {
496 struct socket *so;
497 struct rtpcb *rop;
498 struct rt_msghdr *rtm;
499 struct mbuf *m = m0;
500 struct srp_ref sr;
501
502 /* ensure that we can access the rtm_type via mtod() */
503 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
504 m_freem(m);
505 return;
506 }
507
508 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
509 /*
510 * If route socket is bound to an address family only send
511 * messages that match the address family. Address family
512 * agnostic messages are always sent.
513 */
514 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
515 rop->rop_proto != sa_family)
516 continue;
517
518
519 so = rop->rop_socket;
520 solock(so);
521
522 /*
523 * Check to see if we don't want our own messages and
524 * if we can receive anything.
525 */
526 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
527 !(so->so_state & SS_ISCONNECTED) ||
528 (so->so_rcv.sb_state & SS_CANTRCVMORE))
529 goto next;
530
531 /* filter messages that the process does not want */
532 rtm = mtod(m, struct rt_msghdr *);
533 /* but RTM_DESYNC can't be filtered */
534 if (rtm->rtm_type != RTM_DESYNC) {
535 if (rop->rop_msgfilter != 0 &&
536 !(rop->rop_msgfilter & (1U << rtm->rtm_type)))
537 goto next;
538 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags))
539 goto next;
540 }
541 switch (rtm->rtm_type) {
542 case RTM_IFANNOUNCE:
543 case RTM_DESYNC:
544 /* no tableid */
545 break;
546 case RTM_RESOLVE:
547 case RTM_NEWADDR:
548 case RTM_DELADDR:
549 case RTM_IFINFO:
550 case RTM_80211INFO:
551 case RTM_BFD:
552 /* check against rdomain id */
553 if (rop->rop_rtableid != RTABLE_ANY &&
554 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
555 goto next;
556 break;
557 default:
558 if (rop->rop_priority != 0 &&
559 rop->rop_priority < rtm->rtm_priority)
560 goto next;
561 /* check against rtable id */
562 if (rop->rop_rtableid != RTABLE_ANY &&
563 rop->rop_rtableid != rtm->rtm_tableid)
564 goto next;
565 break;
566 }
567
568 /*
569 * Check to see if the flush flag is set. If so, don't queue
570 * any more messages until the flag is cleared.
571 */
572 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
573 goto next;
574
575 rtm_sendup(so, m);
576 next:
577 sounlock(so);
578 }
579 SRPL_LEAVE(&sr);
580
581 m_freem(m);
582 }
583
584 int
rtm_sendup(struct socket * so,struct mbuf * m0)585 rtm_sendup(struct socket *so, struct mbuf *m0)
586 {
587 struct rtpcb *rop = sotortpcb(so);
588 struct mbuf *m;
589
590 soassertlocked(so);
591
592 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
593 if (m == NULL)
594 return (ENOMEM);
595
596 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
597 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
598 /* Flag socket as desync'ed and flush required */
599 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
600 rtm_senddesync(so);
601 m_freem(m);
602 return (ENOBUFS);
603 }
604
605 sorwakeup(so);
606 return (0);
607 }
608
609 struct rt_msghdr *
rtm_report(struct rtentry * rt,u_char type,int seq,int tableid)610 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
611 {
612 struct rt_msghdr *rtm;
613 struct rt_addrinfo info;
614 struct sockaddr_rtlabel sa_rl;
615 struct sockaddr_in6 sa_mask;
616 #ifdef BFD
617 struct sockaddr_bfd sa_bfd;
618 #endif
619 struct ifnet *ifp = NULL;
620 int len;
621
622 bzero(&info, sizeof(info));
623 info.rti_info[RTAX_DST] = rt_key(rt);
624 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
625 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
626 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
627 #ifdef BFD
628 if (rt->rt_flags & RTF_BFD) {
629 KERNEL_LOCK();
630 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
631 KERNEL_UNLOCK();
632 }
633 #endif
634 #ifdef MPLS
635 if (rt->rt_flags & RTF_MPLS) {
636 struct sockaddr_mpls sa_mpls;
637
638 bzero(&sa_mpls, sizeof(sa_mpls));
639 sa_mpls.smpls_family = AF_MPLS;
640 sa_mpls.smpls_len = sizeof(sa_mpls);
641 sa_mpls.smpls_label = ((struct rt_mpls *)
642 rt->rt_llinfo)->mpls_label;
643 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
644 info.rti_mpls = ((struct rt_mpls *)
645 rt->rt_llinfo)->mpls_operation;
646 }
647 #endif
648 ifp = if_get(rt->rt_ifidx);
649 if (ifp != NULL) {
650 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
651 info.rti_info[RTAX_IFA] = rtable_getsource(tableid,
652 info.rti_info[RTAX_DST]->sa_family);
653 if (info.rti_info[RTAX_IFA] == NULL)
654 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
655 if (ifp->if_flags & IFF_POINTOPOINT)
656 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
657 }
658 if_put(ifp);
659 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
660
661 /* build new route message */
662 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
663 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
664
665 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
666 rtm->rtm_type = type;
667 rtm->rtm_index = rt->rt_ifidx;
668 rtm->rtm_tableid = tableid;
669 rtm->rtm_priority = rt->rt_priority & RTP_MASK;
670 rtm->rtm_flags = rt->rt_flags;
671 rtm->rtm_pid = curproc->p_p->ps_pid;
672 rtm->rtm_seq = seq;
673 rtm_getmetrics(rt, &rtm->rtm_rmx);
674 rtm->rtm_addrs = info.rti_addrs;
675 #ifdef MPLS
676 rtm->rtm_mpls = info.rti_mpls;
677 #endif
678 return rtm;
679 }
680
681 int
route_output(struct mbuf * m,struct socket * so)682 route_output(struct mbuf *m, struct socket *so)
683 {
684 struct rt_msghdr *rtm = NULL;
685 struct rtentry *rt = NULL;
686 struct rt_addrinfo info;
687 struct ifnet *ifp;
688 int len, seq, useloopback, error = 0;
689 u_int tableid;
690 u_int8_t prio;
691 u_char vers, type;
692
693 if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
694 (m = m_pullup(m, sizeof(int32_t))) == NULL))
695 return (ENOBUFS);
696 if ((m->m_flags & M_PKTHDR) == 0)
697 panic("route_output");
698
699 useloopback = so->so_options & SO_USELOOPBACK;
700
701 /*
702 * The socket can't be closed concurrently because the file
703 * descriptor reference is still held.
704 */
705
706 sounlock(so);
707
708 len = m->m_pkthdr.len;
709 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) +
710 sizeof(rtm->rtm_hdrlen) ||
711 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
712 error = EINVAL;
713 goto fail;
714 }
715 vers = mtod(m, struct rt_msghdr *)->rtm_version;
716 switch (vers) {
717 case RTM_VERSION:
718 if (len < sizeof(struct rt_msghdr)) {
719 error = EINVAL;
720 goto fail;
721 }
722 if (len > RTM_MAXSIZE) {
723 error = EMSGSIZE;
724 goto fail;
725 }
726 rtm = malloc(len, M_RTABLE, M_WAITOK);
727 m_copydata(m, 0, len, rtm);
728 break;
729 default:
730 error = EPROTONOSUPPORT;
731 goto fail;
732 }
733
734 /* Verify that the caller is sending an appropriate message early */
735 switch (rtm->rtm_type) {
736 case RTM_ADD:
737 case RTM_DELETE:
738 case RTM_GET:
739 case RTM_CHANGE:
740 case RTM_PROPOSAL:
741 case RTM_SOURCE:
742 break;
743 default:
744 error = EOPNOTSUPP;
745 goto fail;
746 }
747 /*
748 * Verify that the header length is valid.
749 * All messages from userland start with a struct rt_msghdr.
750 */
751 if (rtm->rtm_hdrlen == 0) /* old client */
752 rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
753 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
754 len < rtm->rtm_hdrlen) {
755 error = EINVAL;
756 goto fail;
757 }
758
759 rtm->rtm_pid = curproc->p_p->ps_pid;
760
761 /*
762 * Verify that the caller has the appropriate privilege; RTM_GET
763 * is the only operation the non-superuser is allowed.
764 */
765 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
766 error = EACCES;
767 goto fail;
768 }
769 tableid = rtm->rtm_tableid;
770 if (!rtable_exists(tableid)) {
771 if (rtm->rtm_type == RTM_ADD) {
772 if ((error = rtable_add(tableid)) != 0)
773 goto fail;
774 } else {
775 error = EINVAL;
776 goto fail;
777 }
778 }
779
780 /* Do not let userland play with kernel-only flags. */
781 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
782 error = EINVAL;
783 goto fail;
784 }
785
786 /* make sure that kernel-only bits are not set */
787 rtm->rtm_priority &= RTP_MASK;
788 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
789 rtm->rtm_fmask &= RTF_FMASK;
790
791 if (rtm->rtm_priority != 0) {
792 if (rtm->rtm_priority > RTP_MAX ||
793 rtm->rtm_priority == RTP_LOCAL) {
794 error = EINVAL;
795 goto fail;
796 }
797 prio = rtm->rtm_priority;
798 } else if (rtm->rtm_type != RTM_ADD)
799 prio = RTP_ANY;
800 else if (rtm->rtm_flags & RTF_STATIC)
801 prio = 0;
802 else
803 prio = RTP_DEFAULT;
804
805 bzero(&info, sizeof(info));
806 info.rti_addrs = rtm->rtm_addrs;
807 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
808 len + (caddr_t)rtm, &info)) != 0)
809 goto fail;
810
811 info.rti_flags = rtm->rtm_flags;
812
813 if (rtm->rtm_type != RTM_SOURCE &&
814 rtm->rtm_type != RTM_PROPOSAL &&
815 (info.rti_info[RTAX_DST] == NULL ||
816 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
817 (info.rti_info[RTAX_GATEWAY] != NULL &&
818 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
819 info.rti_info[RTAX_GENMASK] != NULL)) {
820 error = EINVAL;
821 goto fail;
822 }
823 #ifdef MPLS
824 info.rti_mpls = rtm->rtm_mpls;
825 #endif
826
827 if (info.rti_info[RTAX_GATEWAY] != NULL &&
828 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
829 (info.rti_flags & RTF_CLONING) == 0) {
830 info.rti_flags |= RTF_LLINFO;
831 }
832
833 /*
834 * Validate RTM_PROPOSAL and pass it along or error out.
835 */
836 if (rtm->rtm_type == RTM_PROPOSAL) {
837 if (rtm_validate_proposal(&info) == -1) {
838 error = EINVAL;
839 goto fail;
840 }
841 /*
842 * If this is a solicitation proposal forward request to
843 * all interfaces. Most handlers will ignore it but at least
844 * umb(4) will send a response to this event.
845 */
846 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
847 NET_LOCK();
848 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
849 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
850 }
851 NET_UNLOCK();
852 }
853 } else if (rtm->rtm_type == RTM_SOURCE) {
854 if (info.rti_info[RTAX_IFA] == NULL) {
855 error = EINVAL;
856 goto fail;
857 }
858 NET_LOCK();
859 error = rt_setsource(tableid, info.rti_info[RTAX_IFA]);
860 NET_UNLOCK();
861 if (error)
862 goto fail;
863 } else {
864 error = rtm_output(rtm, &rt, &info, prio, tableid);
865 if (!error) {
866 type = rtm->rtm_type;
867 seq = rtm->rtm_seq;
868 free(rtm, M_RTABLE, len);
869 NET_LOCK_SHARED();
870 rtm = rtm_report(rt, type, seq, tableid);
871 NET_UNLOCK_SHARED();
872 len = rtm->rtm_msglen;
873 }
874 }
875
876 rtfree(rt);
877 if (error) {
878 rtm->rtm_errno = error;
879 } else {
880 rtm->rtm_flags |= RTF_DONE;
881 }
882
883 /*
884 * Check to see if we don't want our own messages.
885 */
886 if (!useloopback) {
887 if (rtptable.rtp_count == 0) {
888 /* no other listener and no loopback of messages */
889 goto fail;
890 }
891 }
892 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
893 m_freem(m);
894 m = NULL;
895 } else if (m->m_pkthdr.len > len)
896 m_adj(m, len - m->m_pkthdr.len);
897 free(rtm, M_RTABLE, len);
898 if (m)
899 route_input(m, so, info.rti_info[RTAX_DST] ?
900 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
901 solock(so);
902
903 return (error);
904 fail:
905 free(rtm, M_RTABLE, len);
906 m_freem(m);
907 solock(so);
908
909 return (error);
910 }
911
912 int
rtm_output(struct rt_msghdr * rtm,struct rtentry ** prt,struct rt_addrinfo * info,uint8_t prio,unsigned int tableid)913 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
914 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
915 {
916 struct rtentry *rt = *prt;
917 struct ifnet *ifp = NULL;
918 int plen, newgate = 0, error = 0;
919
920 switch (rtm->rtm_type) {
921 case RTM_ADD:
922 if (info->rti_info[RTAX_GATEWAY] == NULL) {
923 error = EINVAL;
924 break;
925 }
926
927 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
928 if ((error = route_arp_conflict(rt, info))) {
929 rtfree(rt);
930 rt = NULL;
931 break;
932 }
933
934 /*
935 * We cannot go through a delete/create/insert cycle for
936 * cached route because this can lead to races in the
937 * receive path. Instead we update the L2 cache.
938 */
939 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) {
940 ifp = if_get(rt->rt_ifidx);
941 if (ifp == NULL) {
942 rtfree(rt);
943 rt = NULL;
944 error = ESRCH;
945 break;
946 }
947
948 goto change;
949 }
950
951 rtfree(rt);
952 rt = NULL;
953
954 NET_LOCK();
955 if ((error = rtm_getifa(info, tableid)) != 0) {
956 NET_UNLOCK();
957 break;
958 }
959 error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
960 NET_UNLOCK();
961 if (error == 0)
962 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
963 &rt->rt_rmx);
964 break;
965 case RTM_DELETE:
966 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
967 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
968 prio);
969 if (rt == NULL) {
970 error = ESRCH;
971 break;
972 }
973
974 /*
975 * If we got multipath routes, we require users to specify
976 * a matching gateway.
977 */
978 if (ISSET(rt->rt_flags, RTF_MPATH) &&
979 info->rti_info[RTAX_GATEWAY] == NULL) {
980 error = ESRCH;
981 break;
982 }
983
984 ifp = if_get(rt->rt_ifidx);
985 if (ifp == NULL) {
986 rtfree(rt);
987 rt = NULL;
988 error = ESRCH;
989 break;
990 }
991
992 /*
993 * Invalidate the cache of automagically created and
994 * referenced L2 entries to make sure that ``rt_gwroute''
995 * pointer stays valid for other CPUs.
996 */
997 if ((ISSET(rt->rt_flags, RTF_CACHED))) {
998 NET_LOCK();
999 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
1000 /* Reset the MTU of the gateway route. */
1001 rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
1002 route_cleargateway, rt);
1003 NET_UNLOCK();
1004 break;
1005 }
1006
1007 /*
1008 * Make sure that local routes are only modified by the
1009 * kernel.
1010 */
1011 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1012 error = EINVAL;
1013 break;
1014 }
1015
1016 rtfree(rt);
1017 rt = NULL;
1018
1019 NET_LOCK();
1020 error = rtrequest_delete(info, prio, ifp, &rt, tableid);
1021 NET_UNLOCK();
1022 break;
1023 case RTM_CHANGE:
1024 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1025 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1026 prio);
1027 /*
1028 * If we got multipath routes, we require users to specify
1029 * a matching gateway.
1030 */
1031 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
1032 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1033 rtfree(rt);
1034 rt = NULL;
1035 }
1036
1037 /*
1038 * If RTAX_GATEWAY is the argument we're trying to
1039 * change, try to find a compatible route.
1040 */
1041 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
1042 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1043 info->rti_info[RTAX_NETMASK], NULL, prio);
1044 /* Ensure we don't pick a multipath one. */
1045 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
1046 rtfree(rt);
1047 rt = NULL;
1048 }
1049 }
1050
1051 if (rt == NULL) {
1052 error = ESRCH;
1053 break;
1054 }
1055
1056 /*
1057 * Make sure that local routes are only modified by the
1058 * kernel.
1059 */
1060 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1061 error = EINVAL;
1062 break;
1063 }
1064
1065 ifp = if_get(rt->rt_ifidx);
1066 if (ifp == NULL) {
1067 rtfree(rt);
1068 rt = NULL;
1069 error = ESRCH;
1070 break;
1071 }
1072
1073 /*
1074 * RTM_CHANGE needs a perfect match.
1075 */
1076 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1077 info->rti_info[RTAX_NETMASK]);
1078 if (rt_plen(rt) != plen) {
1079 error = ESRCH;
1080 break;
1081 }
1082
1083 if (info->rti_info[RTAX_GATEWAY] != NULL)
1084 if (rt->rt_gateway == NULL ||
1085 bcmp(rt->rt_gateway,
1086 info->rti_info[RTAX_GATEWAY],
1087 info->rti_info[RTAX_GATEWAY]->sa_len)) {
1088 newgate = 1;
1089 }
1090 /*
1091 * Check reachable gateway before changing the route.
1092 * New gateway could require new ifaddr, ifp;
1093 * flags may also be different; ifp may be specified
1094 * by ll sockaddr when protocol address is ambiguous.
1095 */
1096 if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1097 info->rti_info[RTAX_IFA] != NULL) {
1098 struct ifaddr *ifa = NULL;
1099
1100 NET_LOCK();
1101 if ((error = rtm_getifa(info, tableid)) != 0) {
1102 NET_UNLOCK();
1103 break;
1104 }
1105 ifa = info->rti_ifa;
1106 if (rt->rt_ifa != ifa) {
1107 ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1108 ifafree(rt->rt_ifa);
1109
1110 rt->rt_ifa = ifaref(ifa);
1111 rt->rt_ifidx = ifa->ifa_ifp->if_index;
1112 /* recheck link state after ifp change */
1113 rt_if_linkstate_change(rt, ifa->ifa_ifp,
1114 tableid);
1115 }
1116 NET_UNLOCK();
1117 }
1118 change:
1119 if (info->rti_info[RTAX_GATEWAY] != NULL) {
1120 /* When updating the gateway, make sure it is valid. */
1121 if (!newgate && rt->rt_gateway->sa_family !=
1122 info->rti_info[RTAX_GATEWAY]->sa_family) {
1123 error = EINVAL;
1124 break;
1125 }
1126
1127 NET_LOCK();
1128 error = rt_setgate(rt,
1129 info->rti_info[RTAX_GATEWAY], tableid);
1130 NET_UNLOCK();
1131 if (error)
1132 break;
1133 }
1134 #ifdef MPLS
1135 if (rtm->rtm_flags & RTF_MPLS) {
1136 NET_LOCK();
1137 error = rt_mpls_set(rt,
1138 info->rti_info[RTAX_SRC], info->rti_mpls);
1139 NET_UNLOCK();
1140 if (error)
1141 break;
1142 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1143 NET_LOCK();
1144 /* if gateway changed remove MPLS information */
1145 rt_mpls_clear(rt);
1146 NET_UNLOCK();
1147 }
1148 #endif
1149
1150 #ifdef BFD
1151 if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1152 KERNEL_LOCK();
1153 error = bfdset(rt);
1154 KERNEL_UNLOCK();
1155 if (error)
1156 break;
1157 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1158 ISSET(rtm->rtm_fmask, RTF_BFD)) {
1159 KERNEL_LOCK();
1160 bfdclear(rt);
1161 KERNEL_UNLOCK();
1162 }
1163 #endif
1164
1165 NET_LOCK();
1166 /* Hack to allow some flags to be toggled */
1167 if (rtm->rtm_fmask) {
1168 /* MPLS flag it is set by rt_mpls_set() */
1169 rtm->rtm_fmask &= ~RTF_MPLS;
1170 rtm->rtm_flags &= ~RTF_MPLS;
1171 rt->rt_flags =
1172 (rt->rt_flags & ~rtm->rtm_fmask) |
1173 (rtm->rtm_flags & rtm->rtm_fmask);
1174 }
1175 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1176
1177 ifp->if_rtrequest(ifp, RTM_ADD, rt);
1178
1179 if (info->rti_info[RTAX_LABEL] != NULL) {
1180 const char *rtlabel = ((const struct sockaddr_rtlabel *)
1181 info->rti_info[RTAX_LABEL])->sr_label;
1182 rtlabel_unref(rt->rt_labelid);
1183 rt->rt_labelid = rtlabel_name2id(rtlabel);
1184 }
1185 if_group_routechange(info->rti_info[RTAX_DST],
1186 info->rti_info[RTAX_NETMASK]);
1187 rt->rt_locks &= ~(rtm->rtm_inits);
1188 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1189 NET_UNLOCK();
1190 break;
1191 case RTM_GET:
1192 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1193 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1194 prio);
1195 if (rt == NULL)
1196 error = ESRCH;
1197 break;
1198 }
1199
1200 if_put(ifp);
1201 *prt = rt;
1202 return (error);
1203 }
1204
1205 struct ifaddr *
ifa_ifwithroute(int flags,const struct sockaddr * dst,const struct sockaddr * gateway,unsigned int rtableid)1206 ifa_ifwithroute(int flags, const struct sockaddr *dst,
1207 const struct sockaddr *gateway, unsigned int rtableid)
1208 {
1209 struct ifaddr *ifa;
1210
1211 if ((flags & RTF_GATEWAY) == 0) {
1212 /*
1213 * If we are adding a route to an interface,
1214 * and the interface is a pt to pt link
1215 * we should search for the destination
1216 * as our clue to the interface. Otherwise
1217 * we can use the local address.
1218 */
1219 ifa = NULL;
1220 if (flags & RTF_HOST)
1221 ifa = ifa_ifwithdstaddr(dst, rtableid);
1222 if (ifa == NULL)
1223 ifa = ifa_ifwithaddr(gateway, rtableid);
1224 } else {
1225 /*
1226 * If we are adding a route to a remote net
1227 * or host, the gateway may still be on the
1228 * other end of a pt to pt link.
1229 */
1230 ifa = ifa_ifwithdstaddr(gateway, rtableid);
1231 }
1232 if (ifa == NULL) {
1233 if (gateway->sa_family == AF_LINK) {
1234 const struct sockaddr_dl *sdl;
1235 struct ifnet *ifp;
1236
1237 sdl = satosdl_const(gateway);
1238 ifp = if_get(sdl->sdl_index);
1239 if (ifp != NULL)
1240 ifa = ifaof_ifpforaddr(dst, ifp);
1241 if_put(ifp);
1242 } else {
1243 struct rtentry *rt;
1244
1245 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1246 if (rt != NULL)
1247 ifa = rt->rt_ifa;
1248 rtfree(rt);
1249 }
1250 }
1251 if (ifa == NULL)
1252 return (NULL);
1253 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1254 struct ifaddr *oifa = ifa;
1255 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1256 if (ifa == NULL)
1257 ifa = oifa;
1258 }
1259 return (ifa);
1260 }
1261
1262 int
rtm_getifa(struct rt_addrinfo * info,unsigned int rtid)1263 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1264 {
1265 struct ifnet *ifp = NULL;
1266
1267 /*
1268 * The "returned" `ifa' is guaranteed to be alive only if
1269 * the NET_LOCK() is held.
1270 */
1271 NET_ASSERT_LOCKED();
1272
1273 /*
1274 * ifp may be specified by sockaddr_dl when protocol address
1275 * is ambiguous
1276 */
1277 if (info->rti_info[RTAX_IFP] != NULL) {
1278 const struct sockaddr_dl *sdl;
1279
1280 sdl = satosdl_const(info->rti_info[RTAX_IFP]);
1281 ifp = if_get(sdl->sdl_index);
1282 }
1283
1284 #ifdef IPSEC
1285 /*
1286 * If the destination is a PF_KEY address, we'll look
1287 * for the existence of a encap interface number or address
1288 * in the options list of the gateway. By default, we'll return
1289 * enc0.
1290 */
1291 if (info->rti_info[RTAX_DST] &&
1292 info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1293 info->rti_ifa = enc_getifa(rtid, 0);
1294 #endif
1295
1296 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1297 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1298
1299 if (info->rti_ifa == NULL) {
1300 const struct sockaddr *sa;
1301
1302 if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1303 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1304 sa = info->rti_info[RTAX_DST];
1305
1306 if (sa != NULL && ifp != NULL)
1307 info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1308 else if (info->rti_info[RTAX_DST] != NULL &&
1309 info->rti_info[RTAX_GATEWAY] != NULL)
1310 info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1311 info->rti_info[RTAX_DST],
1312 info->rti_info[RTAX_GATEWAY],
1313 rtid);
1314 else if (sa != NULL)
1315 info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1316 sa, sa, rtid);
1317 }
1318
1319 if_put(ifp);
1320
1321 if (info->rti_ifa == NULL)
1322 return (ENETUNREACH);
1323
1324 return (0);
1325 }
1326
1327 int
route_cleargateway(struct rtentry * rt,void * arg,unsigned int rtableid)1328 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1329 {
1330 struct rtentry *nhrt = arg;
1331
1332 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1333 !ISSET(rt->rt_locks, RTV_MTU))
1334 rt->rt_mtu = 0;
1335
1336 return (0);
1337 }
1338
1339 /*
1340 * Check if the user request to insert an ARP entry does not conflict
1341 * with existing ones.
1342 *
1343 * Only two entries are allowed for a given IP address: a private one
1344 * (priv) and a public one (pub).
1345 */
1346 int
route_arp_conflict(struct rtentry * rt,struct rt_addrinfo * info)1347 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1348 {
1349 int proxy = (info->rti_flags & RTF_ANNOUNCE);
1350
1351 if ((info->rti_flags & RTF_LLINFO) == 0 ||
1352 (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1353 return (0);
1354
1355 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1356 return (0);
1357
1358 /* If the entry is cached, it can be updated. */
1359 if (ISSET(rt->rt_flags, RTF_CACHED))
1360 return (0);
1361
1362 /*
1363 * Same destination, not cached and both "priv" or "pub" conflict.
1364 * If a second entry exists, it always conflict.
1365 */
1366 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1367 ISSET(rt->rt_flags, RTF_MPATH))
1368 return (EEXIST);
1369
1370 /* No conflict but an entry exist so we need to force mpath. */
1371 info->rti_flags |= RTF_MPATH;
1372 return (0);
1373 }
1374
1375 void
rtm_setmetrics(u_long which,const struct rt_metrics * in,struct rt_kmetrics * out)1376 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1377 struct rt_kmetrics *out)
1378 {
1379 int64_t expire;
1380
1381 if (which & RTV_MTU)
1382 out->rmx_mtu = in->rmx_mtu;
1383 if (which & RTV_EXPIRE) {
1384 expire = in->rmx_expire;
1385 if (expire != 0) {
1386 expire -= gettime();
1387 expire += getuptime();
1388 }
1389
1390 out->rmx_expire = expire;
1391 }
1392 }
1393
1394 void
rtm_getmetrics(const struct rtentry * rt,struct rt_metrics * out)1395 rtm_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
1396 {
1397 const struct rt_kmetrics *in = &rt->rt_rmx;
1398 int64_t expire;
1399
1400 expire = in->rmx_expire;
1401 if (expire == 0)
1402 expire = rt_timer_get_expire(rt);
1403 if (expire != 0) {
1404 expire -= getuptime();
1405 expire += gettime();
1406 }
1407
1408 bzero(out, sizeof(*out));
1409 out->rmx_locks = in->rmx_locks;
1410 out->rmx_mtu = in->rmx_mtu;
1411 out->rmx_expire = expire;
1412 out->rmx_pksent = in->rmx_pksent;
1413 }
1414
1415 #define ROUNDUP(a) \
1416 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1417 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1418
1419 int
rtm_xaddrs(caddr_t cp,caddr_t cplim,struct rt_addrinfo * rtinfo)1420 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1421 {
1422 int i;
1423
1424 /*
1425 * Parse address bits, split address storage in chunks, and
1426 * set info pointers. Use sa_len for traversing the memory
1427 * and check that we stay within in the limit.
1428 */
1429 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1430 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1431 struct sockaddr *sa;
1432
1433 if ((rtinfo->rti_addrs & (1U << i)) == 0)
1434 continue;
1435 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1436 return (EINVAL);
1437 sa = (struct sockaddr *)cp;
1438 if (cp + sa->sa_len > cplim)
1439 return (EINVAL);
1440 rtinfo->rti_info[i] = sa;
1441 ADVANCE(cp, sa);
1442 }
1443 /*
1444 * Check that the address family is suitable for the route address
1445 * type. Check that each address has a size that fits its family
1446 * and its length is within the size. Strings within addresses must
1447 * be NUL terminated.
1448 */
1449 for (i = 0; i < RTAX_MAX; i++) {
1450 const struct sockaddr *sa;
1451 size_t len, maxlen, size;
1452
1453 sa = rtinfo->rti_info[i];
1454 if (sa == NULL)
1455 continue;
1456 maxlen = size = 0;
1457 switch (i) {
1458 case RTAX_DST:
1459 case RTAX_GATEWAY:
1460 case RTAX_SRC:
1461 switch (sa->sa_family) {
1462 case AF_INET:
1463 size = sizeof(struct sockaddr_in);
1464 break;
1465 case AF_LINK:
1466 size = sizeof(struct sockaddr_dl);
1467 break;
1468 #ifdef INET6
1469 case AF_INET6:
1470 size = sizeof(struct sockaddr_in6);
1471 break;
1472 #endif
1473 #ifdef MPLS
1474 case AF_MPLS:
1475 size = sizeof(struct sockaddr_mpls);
1476 break;
1477 #endif
1478 }
1479 break;
1480 case RTAX_IFP:
1481 if (sa->sa_family != AF_LINK)
1482 return (EAFNOSUPPORT);
1483 /*
1484 * XXX Should be sizeof(struct sockaddr_dl), but
1485 * route(8) has a bug and provides less memory.
1486 * arp(8) has another bug and uses sizeof pointer.
1487 */
1488 size = 4;
1489 break;
1490 case RTAX_IFA:
1491 switch (sa->sa_family) {
1492 case AF_INET:
1493 size = sizeof(struct sockaddr_in);
1494 break;
1495 #ifdef INET6
1496 case AF_INET6:
1497 size = sizeof(struct sockaddr_in6);
1498 break;
1499 #endif
1500 default:
1501 return (EAFNOSUPPORT);
1502 }
1503 break;
1504 case RTAX_LABEL:
1505 if (sa->sa_family != AF_UNSPEC)
1506 return (EAFNOSUPPORT);
1507 maxlen = RTLABEL_LEN;
1508 size = sizeof(struct sockaddr_rtlabel);
1509 break;
1510 #ifdef BFD
1511 case RTAX_BFD:
1512 if (sa->sa_family != AF_UNSPEC)
1513 return (EAFNOSUPPORT);
1514 size = sizeof(struct sockaddr_bfd);
1515 break;
1516 #endif
1517 case RTAX_DNS:
1518 /* more validation in rtm_validate_proposal */
1519 if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1520 return (EINVAL);
1521 if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1522 sr_dns))
1523 return (EINVAL);
1524 switch (sa->sa_family) {
1525 case AF_INET:
1526 #ifdef INET6
1527 case AF_INET6:
1528 #endif
1529 break;
1530 default:
1531 return (EAFNOSUPPORT);
1532 }
1533 break;
1534 case RTAX_STATIC:
1535 switch (sa->sa_family) {
1536 case AF_INET:
1537 #ifdef INET6
1538 case AF_INET6:
1539 #endif
1540 break;
1541 default:
1542 return (EAFNOSUPPORT);
1543 }
1544 maxlen = RTSTATIC_LEN;
1545 size = sizeof(struct sockaddr_rtstatic);
1546 break;
1547 case RTAX_SEARCH:
1548 if (sa->sa_family != AF_UNSPEC)
1549 return (EAFNOSUPPORT);
1550 maxlen = RTSEARCH_LEN;
1551 size = sizeof(struct sockaddr_rtsearch);
1552 break;
1553 }
1554 if (size) {
1555 /* memory for the full struct must be provided */
1556 if (sa->sa_len < size)
1557 return (EINVAL);
1558 }
1559 if (maxlen) {
1560 /* this should not happen */
1561 if (2 + maxlen > size)
1562 return (EINVAL);
1563 /* strings must be NUL terminated within the struct */
1564 len = strnlen(sa->sa_data, maxlen);
1565 if (len >= maxlen || 2 + len >= sa->sa_len)
1566 return (EINVAL);
1567 break;
1568 }
1569 }
1570 return (0);
1571 }
1572
1573 struct mbuf *
rtm_msg1(int type,struct rt_addrinfo * rtinfo)1574 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1575 {
1576 struct rt_msghdr *rtm;
1577 struct mbuf *m;
1578 int i;
1579 const struct sockaddr *sa;
1580 int len, dlen, hlen;
1581
1582 switch (type) {
1583 case RTM_DELADDR:
1584 case RTM_NEWADDR:
1585 hlen = sizeof(struct ifa_msghdr);
1586 break;
1587 case RTM_IFINFO:
1588 hlen = sizeof(struct if_msghdr);
1589 break;
1590 case RTM_IFANNOUNCE:
1591 hlen = sizeof(struct if_announcemsghdr);
1592 break;
1593 #ifdef BFD
1594 case RTM_BFD:
1595 hlen = sizeof(struct bfd_msghdr);
1596 break;
1597 #endif
1598 case RTM_80211INFO:
1599 hlen = sizeof(struct if_ieee80211_msghdr);
1600 break;
1601 default:
1602 hlen = sizeof(struct rt_msghdr);
1603 break;
1604 }
1605 len = hlen;
1606 for (i = 0; i < RTAX_MAX; i++) {
1607 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1608 continue;
1609 len += ROUNDUP(sa->sa_len);
1610 }
1611 if (len > MCLBYTES)
1612 panic("rtm_msg1");
1613 m = m_gethdr(M_DONTWAIT, MT_DATA);
1614 if (m && len > MHLEN) {
1615 MCLGET(m, M_DONTWAIT);
1616 if ((m->m_flags & M_EXT) == 0) {
1617 m_free(m);
1618 m = NULL;
1619 }
1620 }
1621 if (m == NULL)
1622 return (m);
1623 m->m_pkthdr.len = m->m_len = len;
1624 m->m_pkthdr.ph_ifidx = 0;
1625 rtm = mtod(m, struct rt_msghdr *);
1626 bzero(rtm, len);
1627 len = hlen;
1628 for (i = 0; i < RTAX_MAX; i++) {
1629 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1630 continue;
1631 rtinfo->rti_addrs |= (1U << i);
1632 dlen = ROUNDUP(sa->sa_len);
1633 if (m_copyback(m, len, sa->sa_len, sa, M_NOWAIT)) {
1634 m_freem(m);
1635 return (NULL);
1636 }
1637 len += dlen;
1638 }
1639 rtm->rtm_msglen = len;
1640 rtm->rtm_hdrlen = hlen;
1641 rtm->rtm_version = RTM_VERSION;
1642 rtm->rtm_type = type;
1643 return (m);
1644 }
1645
1646 int
rtm_msg2(int type,int vers,struct rt_addrinfo * rtinfo,caddr_t cp,struct walkarg * w)1647 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1648 struct walkarg *w)
1649 {
1650 int i;
1651 int len, dlen, hlen, second_time = 0;
1652 caddr_t cp0;
1653
1654 rtinfo->rti_addrs = 0;
1655 again:
1656 switch (type) {
1657 case RTM_DELADDR:
1658 case RTM_NEWADDR:
1659 len = sizeof(struct ifa_msghdr);
1660 break;
1661 case RTM_IFINFO:
1662 len = sizeof(struct if_msghdr);
1663 break;
1664 default:
1665 len = sizeof(struct rt_msghdr);
1666 break;
1667 }
1668 hlen = len;
1669 if ((cp0 = cp) != NULL)
1670 cp += len;
1671 for (i = 0; i < RTAX_MAX; i++) {
1672 const struct sockaddr *sa;
1673
1674 if ((sa = rtinfo->rti_info[i]) == NULL)
1675 continue;
1676 rtinfo->rti_addrs |= (1U << i);
1677 dlen = ROUNDUP(sa->sa_len);
1678 if (cp) {
1679 bcopy(sa, cp, sa->sa_len);
1680 bzero(cp + sa->sa_len, dlen - sa->sa_len);
1681 cp += dlen;
1682 }
1683 len += dlen;
1684 }
1685 /* align message length to the next natural boundary */
1686 len = ALIGN(len);
1687 if (cp == 0 && w != NULL && !second_time) {
1688 w->w_needed += len;
1689 if (w->w_needed <= w->w_given && w->w_where) {
1690 if (w->w_tmemsize < len) {
1691 free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1692 w->w_tmem = malloc(len, M_RTABLE,
1693 M_NOWAIT | M_ZERO);
1694 if (w->w_tmem)
1695 w->w_tmemsize = len;
1696 }
1697 if (w->w_tmem) {
1698 cp = w->w_tmem;
1699 second_time = 1;
1700 goto again;
1701 } else
1702 w->w_where = 0;
1703 }
1704 }
1705 if (cp && w) /* clear the message header */
1706 bzero(cp0, hlen);
1707
1708 if (cp) {
1709 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1710
1711 rtm->rtm_version = RTM_VERSION;
1712 rtm->rtm_type = type;
1713 rtm->rtm_msglen = len;
1714 rtm->rtm_hdrlen = hlen;
1715 }
1716 return (len);
1717 }
1718
1719 void
rtm_send(struct rtentry * rt,int cmd,int error,unsigned int rtableid)1720 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1721 {
1722 struct rt_addrinfo info;
1723 struct ifnet *ifp;
1724 struct sockaddr_rtlabel sa_rl;
1725 struct sockaddr_in6 sa_mask;
1726
1727 memset(&info, 0, sizeof(info));
1728 info.rti_info[RTAX_DST] = rt_key(rt);
1729 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1730 if (!ISSET(rt->rt_flags, RTF_HOST))
1731 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1732 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1733 ifp = if_get(rt->rt_ifidx);
1734 if (ifp != NULL) {
1735 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1736 info.rti_info[RTAX_IFA] = rtable_getsource(rtableid,
1737 info.rti_info[RTAX_DST]->sa_family);
1738 if (info.rti_info[RTAX_IFA] == NULL)
1739 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1740 }
1741
1742 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1743 rtableid);
1744 if_put(ifp);
1745 }
1746
1747 /*
1748 * This routine is called to generate a message from the routing
1749 * socket indicating that a redirect has occurred, a routing lookup
1750 * has failed, or that a protocol has detected timeouts to a particular
1751 * destination.
1752 */
1753 void
rtm_miss(int type,struct rt_addrinfo * rtinfo,int flags,uint8_t prio,u_int ifidx,int error,u_int tableid)1754 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1755 u_int ifidx, int error, u_int tableid)
1756 {
1757 struct rt_msghdr *rtm;
1758 struct mbuf *m;
1759 const struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1760
1761 if (rtptable.rtp_count == 0)
1762 return;
1763 m = rtm_msg1(type, rtinfo);
1764 if (m == NULL)
1765 return;
1766 rtm = mtod(m, struct rt_msghdr *);
1767 rtm->rtm_flags = RTF_DONE | flags;
1768 rtm->rtm_priority = prio;
1769 rtm->rtm_errno = error;
1770 rtm->rtm_tableid = tableid;
1771 rtm->rtm_addrs = rtinfo->rti_addrs;
1772 rtm->rtm_index = ifidx;
1773 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1774 }
1775
1776 /*
1777 * This routine is called to generate a message from the routing
1778 * socket indicating that the status of a network interface has changed.
1779 */
1780 void
rtm_ifchg(struct ifnet * ifp)1781 rtm_ifchg(struct ifnet *ifp)
1782 {
1783 struct rt_addrinfo info;
1784 struct if_msghdr *ifm;
1785 struct mbuf *m;
1786
1787 if (rtptable.rtp_count == 0)
1788 return;
1789 memset(&info, 0, sizeof(info));
1790 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1791 m = rtm_msg1(RTM_IFINFO, &info);
1792 if (m == NULL)
1793 return;
1794 ifm = mtod(m, struct if_msghdr *);
1795 ifm->ifm_index = ifp->if_index;
1796 ifm->ifm_tableid = ifp->if_rdomain;
1797 ifm->ifm_flags = ifp->if_flags;
1798 ifm->ifm_xflags = ifp->if_xflags;
1799 if_getdata(ifp, &ifm->ifm_data);
1800 ifm->ifm_addrs = info.rti_addrs;
1801 route_input(m, NULL, AF_UNSPEC);
1802 }
1803
1804 /*
1805 * This is called to generate messages from the routing socket
1806 * indicating a network interface has had addresses associated with it.
1807 * if we ever reverse the logic and replace messages TO the routing
1808 * socket indicate a request to configure interfaces, then it will
1809 * be unnecessary as the routing socket will automatically generate
1810 * copies of it.
1811 */
1812 void
rtm_addr(int cmd,struct ifaddr * ifa)1813 rtm_addr(int cmd, struct ifaddr *ifa)
1814 {
1815 struct ifnet *ifp = ifa->ifa_ifp;
1816 struct mbuf *m;
1817 struct rt_addrinfo info;
1818 struct ifa_msghdr *ifam;
1819
1820 if (rtptable.rtp_count == 0)
1821 return;
1822
1823 memset(&info, 0, sizeof(info));
1824 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1825 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1826 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1827 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1828 if ((m = rtm_msg1(cmd, &info)) == NULL)
1829 return;
1830 ifam = mtod(m, struct ifa_msghdr *);
1831 ifam->ifam_index = ifp->if_index;
1832 ifam->ifam_metric = ifa->ifa_metric;
1833 ifam->ifam_flags = ifa->ifa_flags;
1834 ifam->ifam_addrs = info.rti_addrs;
1835 ifam->ifam_tableid = ifp->if_rdomain;
1836
1837 route_input(m, NULL,
1838 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1839 }
1840
1841 /*
1842 * This is called to generate routing socket messages indicating
1843 * network interface arrival and departure.
1844 */
1845 void
rtm_ifannounce(struct ifnet * ifp,int what)1846 rtm_ifannounce(struct ifnet *ifp, int what)
1847 {
1848 struct if_announcemsghdr *ifan;
1849 struct mbuf *m;
1850
1851 if (rtptable.rtp_count == 0)
1852 return;
1853 m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1854 if (m == NULL)
1855 return;
1856 ifan = mtod(m, struct if_announcemsghdr *);
1857 ifan->ifan_index = ifp->if_index;
1858 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1859 ifan->ifan_what = what;
1860 route_input(m, NULL, AF_UNSPEC);
1861 }
1862
1863 #ifdef BFD
1864 /*
1865 * This is used to generate routing socket messages indicating
1866 * the state of a BFD session.
1867 */
1868 void
rtm_bfd(struct bfd_config * bfd)1869 rtm_bfd(struct bfd_config *bfd)
1870 {
1871 struct bfd_msghdr *bfdm;
1872 struct sockaddr_bfd sa_bfd;
1873 struct mbuf *m;
1874 struct rt_addrinfo info;
1875
1876 if (rtptable.rtp_count == 0)
1877 return;
1878 memset(&info, 0, sizeof(info));
1879 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1880 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1881
1882 m = rtm_msg1(RTM_BFD, &info);
1883 if (m == NULL)
1884 return;
1885 bfdm = mtod(m, struct bfd_msghdr *);
1886 bfdm->bm_addrs = info.rti_addrs;
1887
1888 KERNEL_ASSERT_LOCKED();
1889 bfd2sa(bfd->bc_rt, &sa_bfd);
1890 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1891
1892 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1893 }
1894 #endif /* BFD */
1895
1896 /*
1897 * This is used to generate routing socket messages indicating
1898 * the state of an ieee80211 interface.
1899 */
1900 void
rtm_80211info(struct ifnet * ifp,struct if_ieee80211_data * ifie)1901 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1902 {
1903 struct if_ieee80211_msghdr *ifim;
1904 struct mbuf *m;
1905
1906 if (rtptable.rtp_count == 0)
1907 return;
1908 m = rtm_msg1(RTM_80211INFO, NULL);
1909 if (m == NULL)
1910 return;
1911 ifim = mtod(m, struct if_ieee80211_msghdr *);
1912 ifim->ifim_index = ifp->if_index;
1913 ifim->ifim_tableid = ifp->if_rdomain;
1914
1915 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1916 route_input(m, NULL, AF_UNSPEC);
1917 }
1918
1919 /*
1920 * This is used to generate routing socket messages indicating
1921 * the address selection proposal from an interface.
1922 */
1923 void
rtm_proposal(struct ifnet * ifp,struct rt_addrinfo * rtinfo,int flags,uint8_t prio)1924 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1925 uint8_t prio)
1926 {
1927 struct rt_msghdr *rtm;
1928 struct mbuf *m;
1929
1930 m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1931 if (m == NULL)
1932 return;
1933 rtm = mtod(m, struct rt_msghdr *);
1934 rtm->rtm_flags = RTF_DONE | flags;
1935 rtm->rtm_priority = prio;
1936 rtm->rtm_tableid = ifp->if_rdomain;
1937 rtm->rtm_index = ifp->if_index;
1938 rtm->rtm_addrs = rtinfo->rti_addrs;
1939
1940 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1941 }
1942
1943 /*
1944 * This is used in dumping the kernel table via sysctl().
1945 */
1946 int
sysctl_dumpentry(struct rtentry * rt,void * v,unsigned int id)1947 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1948 {
1949 struct walkarg *w = v;
1950 int error = 0, size;
1951 struct rt_addrinfo info;
1952 struct ifnet *ifp;
1953 #ifdef BFD
1954 struct sockaddr_bfd sa_bfd;
1955 #endif
1956 struct sockaddr_rtlabel sa_rl;
1957 struct sockaddr_in6 sa_mask;
1958
1959 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1960 return 0;
1961 if (w->w_op == NET_RT_DUMP && w->w_arg) {
1962 u_int8_t prio = w->w_arg & RTP_MASK;
1963 if (w->w_arg < 0) {
1964 prio = (-w->w_arg) & RTP_MASK;
1965 /* Show all routes that are not this priority */
1966 if (prio == (rt->rt_priority & RTP_MASK))
1967 return 0;
1968 } else {
1969 if (prio != (rt->rt_priority & RTP_MASK) &&
1970 prio != RTP_ANY)
1971 return 0;
1972 }
1973 }
1974 bzero(&info, sizeof(info));
1975 info.rti_info[RTAX_DST] = rt_key(rt);
1976 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1977 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1978 ifp = if_get(rt->rt_ifidx);
1979 if (ifp != NULL) {
1980 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1981 info.rti_info[RTAX_IFA] =
1982 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
1983 if (info.rti_info[RTAX_IFA] == NULL)
1984 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1985 if (ifp->if_flags & IFF_POINTOPOINT)
1986 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1987 }
1988 if_put(ifp);
1989 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1990 #ifdef BFD
1991 if (rt->rt_flags & RTF_BFD) {
1992 KERNEL_ASSERT_LOCKED();
1993 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1994 }
1995 #endif
1996 #ifdef MPLS
1997 if (rt->rt_flags & RTF_MPLS) {
1998 struct sockaddr_mpls sa_mpls;
1999
2000 bzero(&sa_mpls, sizeof(sa_mpls));
2001 sa_mpls.smpls_family = AF_MPLS;
2002 sa_mpls.smpls_len = sizeof(sa_mpls);
2003 sa_mpls.smpls_label = ((struct rt_mpls *)
2004 rt->rt_llinfo)->mpls_label;
2005 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
2006 info.rti_mpls = ((struct rt_mpls *)
2007 rt->rt_llinfo)->mpls_operation;
2008 }
2009 #endif
2010
2011 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
2012 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) {
2013 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
2014
2015 rtm->rtm_pid = curproc->p_p->ps_pid;
2016 rtm->rtm_flags = RTF_DONE | rt->rt_flags;
2017 rtm->rtm_priority = rt->rt_priority & RTP_MASK;
2018 rtm_getmetrics(rt, &rtm->rtm_rmx);
2019 /* Do not account the routing table's reference. */
2020 rtm->rtm_rmx.rmx_refcnt = refcnt_read(&rt->rt_refcnt) - 1;
2021 rtm->rtm_index = rt->rt_ifidx;
2022 rtm->rtm_addrs = info.rti_addrs;
2023 rtm->rtm_tableid = id;
2024 #ifdef MPLS
2025 rtm->rtm_mpls = info.rti_mpls;
2026 #endif
2027 if ((error = copyout(rtm, w->w_where, size)) != 0)
2028 w->w_where = NULL;
2029 else
2030 w->w_where += size;
2031 }
2032 return (error);
2033 }
2034
2035 int
sysctl_iflist(int af,struct walkarg * w)2036 sysctl_iflist(int af, struct walkarg *w)
2037 {
2038 struct ifnet *ifp;
2039 struct ifaddr *ifa;
2040 struct rt_addrinfo info;
2041 int len, error = 0;
2042
2043 bzero(&info, sizeof(info));
2044 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2045 if (w->w_arg && w->w_arg != ifp->if_index)
2046 continue;
2047 /* Copy the link-layer address first */
2048 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
2049 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
2050 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) {
2051 struct if_msghdr *ifm;
2052
2053 ifm = (struct if_msghdr *)w->w_tmem;
2054 ifm->ifm_index = ifp->if_index;
2055 ifm->ifm_tableid = ifp->if_rdomain;
2056 ifm->ifm_flags = ifp->if_flags;
2057 if_getdata(ifp, &ifm->ifm_data);
2058 ifm->ifm_addrs = info.rti_addrs;
2059 error = copyout(ifm, w->w_where, len);
2060 if (error)
2061 return (error);
2062 w->w_where += len;
2063 }
2064 info.rti_info[RTAX_IFP] = NULL;
2065 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2066 KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
2067 if (af && af != ifa->ifa_addr->sa_family)
2068 continue;
2069 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2070 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2071 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2072 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
2073 if (w->w_where && w->w_tmem &&
2074 w->w_needed <= w->w_given) {
2075 struct ifa_msghdr *ifam;
2076
2077 ifam = (struct ifa_msghdr *)w->w_tmem;
2078 ifam->ifam_index = ifa->ifa_ifp->if_index;
2079 ifam->ifam_flags = ifa->ifa_flags;
2080 ifam->ifam_metric = ifa->ifa_metric;
2081 ifam->ifam_addrs = info.rti_addrs;
2082 error = copyout(w->w_tmem, w->w_where, len);
2083 if (error)
2084 return (error);
2085 w->w_where += len;
2086 }
2087 }
2088 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2089 info.rti_info[RTAX_BRD] = NULL;
2090 }
2091 return (0);
2092 }
2093
2094 int
sysctl_ifnames(struct walkarg * w)2095 sysctl_ifnames(struct walkarg *w)
2096 {
2097 struct if_nameindex_msg ifn;
2098 struct ifnet *ifp;
2099 int error = 0;
2100
2101 /* XXX ignore tableid for now */
2102 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2103 if (w->w_arg && w->w_arg != ifp->if_index)
2104 continue;
2105 w->w_needed += sizeof(ifn);
2106 if (w->w_where && w->w_needed <= w->w_given) {
2107
2108 memset(&ifn, 0, sizeof(ifn));
2109 ifn.if_index = ifp->if_index;
2110 strlcpy(ifn.if_name, ifp->if_xname,
2111 sizeof(ifn.if_name));
2112 error = copyout(&ifn, w->w_where, sizeof(ifn));
2113 if (error)
2114 return (error);
2115 w->w_where += sizeof(ifn);
2116 }
2117 }
2118
2119 return (0);
2120 }
2121
2122 int
sysctl_source(int af,u_int tableid,struct walkarg * w)2123 sysctl_source(int af, u_int tableid, struct walkarg *w)
2124 {
2125 struct sockaddr *sa;
2126 int size, error = 0;
2127
2128 sa = rtable_getsource(tableid, af);
2129 if (sa) {
2130 switch (sa->sa_family) {
2131 case AF_INET:
2132 size = sizeof(struct sockaddr_in);
2133 break;
2134 #ifdef INET6
2135 case AF_INET6:
2136 size = sizeof(struct sockaddr_in6);
2137 break;
2138 #endif
2139 default:
2140 return (0);
2141 }
2142 w->w_needed += size;
2143 if (w->w_where && w->w_needed <= w->w_given) {
2144 if ((error = copyout(sa, w->w_where, size)))
2145 return (error);
2146 w->w_where += size;
2147 }
2148 }
2149 return (0);
2150 }
2151
2152 int
sysctl_rtable(int * name,u_int namelen,void * where,size_t * given,void * new,size_t newlen)2153 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2154 size_t newlen)
2155 {
2156 int i, error = EINVAL;
2157 u_char af;
2158 struct walkarg w;
2159 struct rt_tableinfo tableinfo;
2160 u_int tableid = 0;
2161
2162 if (new)
2163 return (EPERM);
2164 if (namelen < 3 || namelen > 4)
2165 return (EINVAL);
2166 af = name[0];
2167 bzero(&w, sizeof(w));
2168 w.w_where = where;
2169 w.w_given = *given;
2170 w.w_op = name[1];
2171 w.w_arg = name[2];
2172
2173 if (namelen == 4) {
2174 tableid = name[3];
2175 if (!rtable_exists(tableid))
2176 return (ENOENT);
2177 } else
2178 tableid = curproc->p_p->ps_rtableid;
2179
2180 switch (w.w_op) {
2181 case NET_RT_DUMP:
2182 case NET_RT_FLAGS:
2183 NET_LOCK_SHARED();
2184 for (i = 1; i <= AF_MAX; i++) {
2185 if (af != 0 && af != i)
2186 continue;
2187
2188 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2189 &w);
2190 if (error == EAFNOSUPPORT)
2191 error = 0;
2192 if (error)
2193 break;
2194 }
2195 NET_UNLOCK_SHARED();
2196 break;
2197
2198 case NET_RT_IFLIST:
2199 NET_LOCK_SHARED();
2200 error = sysctl_iflist(af, &w);
2201 NET_UNLOCK_SHARED();
2202 break;
2203
2204 case NET_RT_STATS:
2205 return (sysctl_rtable_rtstat(where, given, new));
2206 case NET_RT_TABLE:
2207 tableid = w.w_arg;
2208 if (!rtable_exists(tableid))
2209 return (ENOENT);
2210 memset(&tableinfo, 0, sizeof tableinfo);
2211 tableinfo.rti_tableid = tableid;
2212 tableinfo.rti_domainid = rtable_l2(tableid);
2213 error = sysctl_rdstruct(where, given, new,
2214 &tableinfo, sizeof(tableinfo));
2215 return (error);
2216 case NET_RT_IFNAMES:
2217 NET_LOCK_SHARED();
2218 error = sysctl_ifnames(&w);
2219 NET_UNLOCK_SHARED();
2220 break;
2221 case NET_RT_SOURCE:
2222 tableid = w.w_arg;
2223 if (!rtable_exists(tableid))
2224 return (ENOENT);
2225 NET_LOCK_SHARED();
2226 for (i = 1; i <= AF_MAX; i++) {
2227 if (af != 0 && af != i)
2228 continue;
2229
2230 error = sysctl_source(i, tableid, &w);
2231 if (error == EAFNOSUPPORT)
2232 error = 0;
2233 if (error)
2234 break;
2235 }
2236 NET_UNLOCK_SHARED();
2237 break;
2238 }
2239 free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2240 if (where) {
2241 *given = w.w_where - (caddr_t)where;
2242 if (w.w_needed > w.w_given)
2243 return (ENOMEM);
2244 } else if (w.w_needed == 0) {
2245 *given = 0;
2246 } else {
2247 *given = roundup(w.w_needed + MAX(w.w_needed / 10, 1024),
2248 PAGE_SIZE);
2249 }
2250 return (error);
2251 }
2252
2253 int
sysctl_rtable_rtstat(void * oldp,size_t * oldlenp,void * newp)2254 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2255 {
2256 extern struct cpumem *rtcounters;
2257 uint64_t counters[rts_ncounters];
2258 struct rtstat rtstat;
2259 uint32_t *words = (uint32_t *)&rtstat;
2260 int i;
2261
2262 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2263 memset(&rtstat, 0, sizeof rtstat);
2264 counters_read(rtcounters, counters, nitems(counters), NULL);
2265
2266 for (i = 0; i < nitems(counters); i++)
2267 words[i] = (uint32_t)counters[i];
2268
2269 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2270 }
2271
2272 int
rtm_validate_proposal(struct rt_addrinfo * info)2273 rtm_validate_proposal(struct rt_addrinfo *info)
2274 {
2275 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2276 RTA_SEARCH)) {
2277 return -1;
2278 }
2279
2280 if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2281 const struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2282 if (sa == NULL)
2283 return -1;
2284 switch (sa->sa_family) {
2285 case AF_INET:
2286 if (sa->sa_len != sizeof(struct sockaddr_in))
2287 return -1;
2288 break;
2289 case AF_INET6:
2290 if (sa->sa_len != sizeof(struct sockaddr_in6))
2291 return -1;
2292 break;
2293 default:
2294 return -1;
2295 }
2296 }
2297
2298 if (ISSET(info->rti_addrs, RTA_IFA)) {
2299 const struct sockaddr *sa = info->rti_info[RTAX_IFA];
2300 if (sa == NULL)
2301 return -1;
2302 switch (sa->sa_family) {
2303 case AF_INET:
2304 if (sa->sa_len != sizeof(struct sockaddr_in))
2305 return -1;
2306 break;
2307 case AF_INET6:
2308 if (sa->sa_len != sizeof(struct sockaddr_in6))
2309 return -1;
2310 break;
2311 default:
2312 return -1;
2313 }
2314 }
2315
2316 if (ISSET(info->rti_addrs, RTA_DNS)) {
2317 const struct sockaddr_rtdns *rtdns =
2318 (const struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2319 if (rtdns == NULL)
2320 return -1;
2321 if (rtdns->sr_len > sizeof(*rtdns))
2322 return -1;
2323 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2324 return -1;
2325 switch (rtdns->sr_family) {
2326 case AF_INET:
2327 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2328 sr_dns)) % sizeof(struct in_addr) != 0)
2329 return -1;
2330 break;
2331 #ifdef INET6
2332 case AF_INET6:
2333 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2334 sr_dns)) % sizeof(struct in6_addr) != 0)
2335 return -1;
2336 break;
2337 #endif
2338 default:
2339 return -1;
2340 }
2341 }
2342
2343 if (ISSET(info->rti_addrs, RTA_STATIC)) {
2344 const struct sockaddr_rtstatic *rtstatic = (const struct
2345 sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2346 if (rtstatic == NULL)
2347 return -1;
2348 if (rtstatic->sr_len > sizeof(*rtstatic))
2349 return -1;
2350 if (rtstatic->sr_len <=
2351 offsetof(struct sockaddr_rtstatic, sr_static))
2352 return -1;
2353 }
2354
2355 if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2356 const struct sockaddr_rtsearch *rtsearch = (const struct
2357 sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2358 if (rtsearch == NULL)
2359 return -1;
2360 if (rtsearch->sr_len > sizeof(*rtsearch))
2361 return -1;
2362 if (rtsearch->sr_len <=
2363 offsetof(struct sockaddr_rtsearch, sr_search))
2364 return -1;
2365 }
2366
2367 return 0;
2368 }
2369
2370 int
rt_setsource(unsigned int rtableid,const struct sockaddr * src)2371 rt_setsource(unsigned int rtableid, const struct sockaddr *src)
2372 {
2373 struct ifaddr *ifa;
2374 /*
2375 * If source address is 0.0.0.0 or ::
2376 * use automatic source selection
2377 */
2378 switch(src->sa_family) {
2379 case AF_INET:
2380 if(satosin_const(src)->sin_addr.s_addr == INADDR_ANY) {
2381 rtable_setsource(rtableid, AF_INET, NULL);
2382 return (0);
2383 }
2384 break;
2385 #ifdef INET6
2386 case AF_INET6:
2387 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6_const(src)->sin6_addr)) {
2388 rtable_setsource(rtableid, AF_INET6, NULL);
2389 return (0);
2390 }
2391 break;
2392 #endif
2393 default:
2394 return (EAFNOSUPPORT);
2395 }
2396
2397 /*
2398 * Check if source address is assigned to an interface in the
2399 * same rdomain
2400 */
2401 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL)
2402 return (EINVAL);
2403
2404 return rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr);
2405 }
2406
2407 /*
2408 * Definitions of protocols supported in the ROUTE domain.
2409 */
2410
2411 const struct pr_usrreqs route_usrreqs = {
2412 .pru_attach = route_attach,
2413 .pru_detach = route_detach,
2414 .pru_disconnect = route_disconnect,
2415 .pru_shutdown = route_shutdown,
2416 .pru_rcvd = route_rcvd,
2417 .pru_send = route_send,
2418 .pru_sockaddr = route_sockaddr,
2419 .pru_peeraddr = route_peeraddr,
2420 };
2421
2422 const struct protosw routesw[] = {
2423 {
2424 .pr_type = SOCK_RAW,
2425 .pr_domain = &routedomain,
2426 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2427 .pr_ctloutput = route_ctloutput,
2428 .pr_usrreqs = &route_usrreqs,
2429 .pr_init = route_prinit,
2430 .pr_sysctl = sysctl_rtable
2431 }
2432 };
2433
2434 const struct domain routedomain = {
2435 .dom_family = PF_ROUTE,
2436 .dom_name = "route",
2437 .dom_init = route_init,
2438 .dom_protosw = routesw,
2439 .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2440 };
2441