1 /* $OpenBSD: rtsock.c,v 1.375 2024/07/12 17:20:18 mvs Exp $ */
2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1988, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95
62 */
63
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80
81 #include <netinet/in.h>
82
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97
98 #define ROUTESNDQ 8192
99 #define ROUTERCVQ 8192
100
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102
103 struct walkarg {
104 int w_op, w_arg, w_tmemsize;
105 size_t w_given, w_needed;
106 caddr_t w_where, w_tmem;
107 };
108
109 void route_prinit(void);
110 void rcb_ref(void *, void *);
111 void rcb_unref(void *, void *);
112 int route_output(struct mbuf *, struct socket *);
113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int route_attach(struct socket *, int, int);
115 int route_detach(struct socket *);
116 int route_disconnect(struct socket *);
117 int route_shutdown(struct socket *);
118 void route_rcvd(struct socket *);
119 int route_send(struct socket *, struct mbuf *, struct mbuf *,
120 struct mbuf *);
121 int route_sockaddr(struct socket *, struct mbuf *);
122 int route_peeraddr(struct socket *, struct mbuf *);
123 void route_input(struct mbuf *m0, struct socket *, sa_family_t);
124 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
125 int route_cleargateway(struct rtentry *, void *, unsigned int);
126 void rtm_senddesync_timer(void *);
127 void rtm_senddesync(struct socket *);
128 int rtm_sendup(struct socket *, struct mbuf *);
129
130 int rtm_getifa(struct rt_addrinfo *, unsigned int);
131 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
132 uint8_t, unsigned int);
133 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
134 struct mbuf *rtm_msg1(int, struct rt_addrinfo *);
135 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
136 struct walkarg *);
137 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
138 int rtm_validate_proposal(struct rt_addrinfo *);
139 void rtm_setmetrics(u_long, const struct rt_metrics *,
140 struct rt_kmetrics *);
141 void rtm_getmetrics(const struct rtentry *,
142 struct rt_metrics *);
143
144 int sysctl_iflist(int, struct walkarg *);
145 int sysctl_ifnames(struct walkarg *);
146 int sysctl_rtable_rtstat(void *, size_t *, void *);
147
148 int rt_setsource(unsigned int, const struct sockaddr *);
149
150 /*
151 * Locks used to protect struct members
152 * I immutable after creation
153 * s solock
154 */
155 struct rtpcb {
156 struct socket *rop_socket; /* [I] */
157
158 SRPL_ENTRY(rtpcb) rop_list;
159 struct refcnt rop_refcnt;
160 struct timeout rop_timeout;
161 unsigned int rop_msgfilter; /* [s] */
162 unsigned int rop_flagfilter; /* [s] */
163 unsigned int rop_flags; /* [s] */
164 u_int rop_rtableid; /* [s] */
165 unsigned short rop_proto; /* [I] */
166 u_char rop_priority; /* [s] */
167 };
168 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb)
169
170 struct rtptable {
171 SRPL_HEAD(, rtpcb) rtp_list;
172 struct srpl_rc rtp_rc;
173 struct rwlock rtp_lk;
174 unsigned int rtp_count;
175 };
176
177 struct pool rtpcb_pool;
178 struct rtptable rtptable;
179
180 /*
181 * These flags and timeout are used for indicating to userland (via a
182 * RTM_DESYNC msg) when the route socket has overflowed and messages
183 * have been lost.
184 */
185 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */
186 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before
187 queueing more packets */
188
189 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */
190
191 void
route_prinit(void)192 route_prinit(void)
193 {
194 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
195 rw_init(&rtptable.rtp_lk, "rtsock");
196 SRPL_INIT(&rtptable.rtp_list);
197 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
198 IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
199 }
200
201 void
rcb_ref(void * null,void * v)202 rcb_ref(void *null, void *v)
203 {
204 struct rtpcb *rop = v;
205
206 refcnt_take(&rop->rop_refcnt);
207 }
208
209 void
rcb_unref(void * null,void * v)210 rcb_unref(void *null, void *v)
211 {
212 struct rtpcb *rop = v;
213
214 refcnt_rele_wake(&rop->rop_refcnt);
215 }
216
217 int
route_attach(struct socket * so,int proto,int wait)218 route_attach(struct socket *so, int proto, int wait)
219 {
220 struct rtpcb *rop;
221 int error;
222
223 error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
224 if (error)
225 return (error);
226 /*
227 * use the rawcb but allocate a rtpcb, this
228 * code does not care about the additional fields
229 * and works directly on the raw socket.
230 */
231 rop = pool_get(&rtpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
232 PR_ZERO);
233 if (rop == NULL)
234 return (ENOBUFS);
235 so->so_pcb = rop;
236 /* Init the timeout structure */
237 timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so,
238 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
239 refcnt_init(&rop->rop_refcnt);
240
241 rop->rop_socket = so;
242 rop->rop_proto = proto;
243
244 rop->rop_rtableid = curproc->p_p->ps_rtableid;
245
246 soisconnected(so);
247 so->so_options |= SO_USELOOPBACK;
248
249 rw_enter(&rtptable.rtp_lk, RW_WRITE);
250 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
251 rop_list);
252 rtptable.rtp_count++;
253 rw_exit(&rtptable.rtp_lk);
254
255 return (0);
256 }
257
258 int
route_detach(struct socket * so)259 route_detach(struct socket *so)
260 {
261 struct rtpcb *rop;
262
263 soassertlocked(so);
264
265 rop = sotortpcb(so);
266 if (rop == NULL)
267 return (EINVAL);
268
269 rw_enter(&rtptable.rtp_lk, RW_WRITE);
270
271 rtptable.rtp_count--;
272 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
273 rop_list);
274 rw_exit(&rtptable.rtp_lk);
275
276 sounlock(so);
277
278 /* wait for all references to drop */
279 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
280 timeout_del_barrier(&rop->rop_timeout);
281
282 solock(so);
283
284 so->so_pcb = NULL;
285 KASSERT((so->so_state & SS_NOFDREF) == 0);
286 pool_put(&rtpcb_pool, rop);
287
288 return (0);
289 }
290
291 int
route_disconnect(struct socket * so)292 route_disconnect(struct socket *so)
293 {
294 soisdisconnected(so);
295 return (0);
296 }
297
298 int
route_shutdown(struct socket * so)299 route_shutdown(struct socket *so)
300 {
301 socantsendmore(so);
302 return (0);
303 }
304
305 void
route_rcvd(struct socket * so)306 route_rcvd(struct socket *so)
307 {
308 struct rtpcb *rop = sotortpcb(so);
309
310 soassertlocked(so);
311
312 /*
313 * If we are in a FLUSH state, check if the buffer is
314 * empty so that we can clear the flag.
315 */
316
317 mtx_enter(&so->so_rcv.sb_mtx);
318 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
319 ((sbspace_locked(so, &so->so_rcv) == so->so_rcv.sb_hiwat)))
320 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
321 mtx_leave(&so->so_rcv.sb_mtx);
322 }
323
324 int
route_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)325 route_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
326 struct mbuf *control)
327 {
328 int error;
329
330 soassertlocked(so);
331
332 if (control && control->m_len) {
333 error = EOPNOTSUPP;
334 goto out;
335 }
336
337 if (nam) {
338 error = EISCONN;
339 goto out;
340 }
341
342 error = route_output(m, so);
343 m = NULL;
344
345 out:
346 m_freem(control);
347 m_freem(m);
348
349 return (error);
350 }
351
352 int
route_sockaddr(struct socket * so,struct mbuf * nam)353 route_sockaddr(struct socket *so, struct mbuf *nam)
354 {
355 return (EINVAL);
356 }
357
358 int
route_peeraddr(struct socket * so,struct mbuf * nam)359 route_peeraddr(struct socket *so, struct mbuf *nam)
360 {
361 /* minimal support, just implement a fake peer address */
362 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
363 nam->m_len = route_src.sa_len;
364 return (0);
365 }
366
367 int
route_ctloutput(int op,struct socket * so,int level,int optname,struct mbuf * m)368 route_ctloutput(int op, struct socket *so, int level, int optname,
369 struct mbuf *m)
370 {
371 struct rtpcb *rop = sotortpcb(so);
372 int error = 0;
373 unsigned int tid, prio;
374
375 if (level != AF_ROUTE)
376 return (EINVAL);
377
378 switch (op) {
379 case PRCO_SETOPT:
380 switch (optname) {
381 case ROUTE_MSGFILTER:
382 if (m == NULL || m->m_len != sizeof(unsigned int))
383 error = EINVAL;
384 else
385 rop->rop_msgfilter = *mtod(m, unsigned int *);
386 break;
387 case ROUTE_TABLEFILTER:
388 if (m == NULL || m->m_len != sizeof(unsigned int)) {
389 error = EINVAL;
390 break;
391 }
392 tid = *mtod(m, unsigned int *);
393 if (tid != RTABLE_ANY && !rtable_exists(tid))
394 error = ENOENT;
395 else
396 rop->rop_rtableid = tid;
397 break;
398 case ROUTE_PRIOFILTER:
399 if (m == NULL || m->m_len != sizeof(unsigned int)) {
400 error = EINVAL;
401 break;
402 }
403 prio = *mtod(m, unsigned int *);
404 if (prio > RTP_MAX)
405 error = EINVAL;
406 else
407 rop->rop_priority = prio;
408 break;
409 case ROUTE_FLAGFILTER:
410 if (m == NULL || m->m_len != sizeof(unsigned int))
411 error = EINVAL;
412 else
413 rop->rop_flagfilter = *mtod(m, unsigned int *);
414 break;
415 default:
416 error = ENOPROTOOPT;
417 break;
418 }
419 break;
420 case PRCO_GETOPT:
421 switch (optname) {
422 case ROUTE_MSGFILTER:
423 m->m_len = sizeof(unsigned int);
424 *mtod(m, unsigned int *) = rop->rop_msgfilter;
425 break;
426 case ROUTE_TABLEFILTER:
427 m->m_len = sizeof(unsigned int);
428 *mtod(m, unsigned int *) = rop->rop_rtableid;
429 break;
430 case ROUTE_PRIOFILTER:
431 m->m_len = sizeof(unsigned int);
432 *mtod(m, unsigned int *) = rop->rop_priority;
433 break;
434 case ROUTE_FLAGFILTER:
435 m->m_len = sizeof(unsigned int);
436 *mtod(m, unsigned int *) = rop->rop_flagfilter;
437 break;
438 default:
439 error = ENOPROTOOPT;
440 break;
441 }
442 }
443 return (error);
444 }
445
446 void
rtm_senddesync_timer(void * xso)447 rtm_senddesync_timer(void *xso)
448 {
449 struct socket *so = xso;
450
451 solock(so);
452 rtm_senddesync(so);
453 sounlock(so);
454 }
455
456 void
rtm_senddesync(struct socket * so)457 rtm_senddesync(struct socket *so)
458 {
459 struct rtpcb *rop = sotortpcb(so);
460 struct mbuf *desync_mbuf;
461
462 soassertlocked(so);
463
464 /*
465 * Dying socket is disconnected by upper layer and there is
466 * no reason to send packet. Also we shouldn't reschedule
467 * timeout(9), otherwise timeout_del_barrier(9) can't help us.
468 */
469 if ((so->so_state & SS_ISCONNECTED) == 0 ||
470 (so->so_rcv.sb_state & SS_CANTRCVMORE))
471 return;
472
473 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
474 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
475 return;
476
477 /*
478 * If we fail to alloc memory or if sbappendaddr()
479 * fails, re-add timeout and try again.
480 */
481 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
482 if (desync_mbuf != NULL) {
483 int ret;
484
485 mtx_enter(&so->so_rcv.sb_mtx);
486 ret = sbappendaddr(so, &so->so_rcv, &route_src,
487 desync_mbuf, NULL);
488 mtx_leave(&so->so_rcv.sb_mtx);
489
490 if (ret != 0) {
491 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
492 sorwakeup(rop->rop_socket);
493 return;
494 }
495 m_freem(desync_mbuf);
496 }
497 /* Re-add timeout to try sending msg again */
498 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
499 }
500
501 void
route_input(struct mbuf * m0,struct socket * so0,sa_family_t sa_family)502 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
503 {
504 struct socket *so;
505 struct rtpcb *rop;
506 struct rt_msghdr *rtm;
507 struct mbuf *m = m0;
508 struct srp_ref sr;
509
510 /* ensure that we can access the rtm_type via mtod() */
511 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
512 m_freem(m);
513 return;
514 }
515
516 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
517 /*
518 * If route socket is bound to an address family only send
519 * messages that match the address family. Address family
520 * agnostic messages are always sent.
521 */
522 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
523 rop->rop_proto != sa_family)
524 continue;
525
526
527 so = rop->rop_socket;
528 solock(so);
529
530 /*
531 * Check to see if we don't want our own messages and
532 * if we can receive anything.
533 */
534 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
535 !(so->so_state & SS_ISCONNECTED) ||
536 (so->so_rcv.sb_state & SS_CANTRCVMORE))
537 goto next;
538
539 /* filter messages that the process does not want */
540 rtm = mtod(m, struct rt_msghdr *);
541 /* but RTM_DESYNC can't be filtered */
542 if (rtm->rtm_type != RTM_DESYNC) {
543 if (rop->rop_msgfilter != 0 &&
544 !(rop->rop_msgfilter & (1U << rtm->rtm_type)))
545 goto next;
546 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags))
547 goto next;
548 }
549 switch (rtm->rtm_type) {
550 case RTM_IFANNOUNCE:
551 case RTM_DESYNC:
552 /* no tableid */
553 break;
554 case RTM_RESOLVE:
555 case RTM_NEWADDR:
556 case RTM_DELADDR:
557 case RTM_IFINFO:
558 case RTM_80211INFO:
559 case RTM_BFD:
560 /* check against rdomain id */
561 if (rop->rop_rtableid != RTABLE_ANY &&
562 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
563 goto next;
564 break;
565 default:
566 if (rop->rop_priority != 0 &&
567 rop->rop_priority < rtm->rtm_priority)
568 goto next;
569 /* check against rtable id */
570 if (rop->rop_rtableid != RTABLE_ANY &&
571 rop->rop_rtableid != rtm->rtm_tableid)
572 goto next;
573 break;
574 }
575
576 /*
577 * Check to see if the flush flag is set. If so, don't queue
578 * any more messages until the flag is cleared.
579 */
580 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
581 goto next;
582
583 rtm_sendup(so, m);
584 next:
585 sounlock(so);
586 }
587 SRPL_LEAVE(&sr);
588
589 m_freem(m);
590 }
591
592 int
rtm_sendup(struct socket * so,struct mbuf * m0)593 rtm_sendup(struct socket *so, struct mbuf *m0)
594 {
595 struct rtpcb *rop = sotortpcb(so);
596 struct mbuf *m;
597 int send_desync = 0;
598
599 soassertlocked(so);
600
601 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
602 if (m == NULL)
603 return (ENOMEM);
604
605 mtx_enter(&so->so_rcv.sb_mtx);
606 if (sbspace_locked(so, &so->so_rcv) < (2 * MSIZE) ||
607 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0)
608 send_desync = 1;
609 mtx_leave(&so->so_rcv.sb_mtx);
610
611 if (send_desync) {
612 /* Flag socket as desync'ed and flush required */
613 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
614 rtm_senddesync(so);
615 m_freem(m);
616 return (ENOBUFS);
617 }
618
619 sorwakeup(so);
620 return (0);
621 }
622
623 struct rt_msghdr *
rtm_report(struct rtentry * rt,u_char type,int seq,int tableid)624 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
625 {
626 struct rt_msghdr *rtm;
627 struct rt_addrinfo info;
628 struct sockaddr_rtlabel sa_rl;
629 struct sockaddr_in6 sa_mask;
630 #ifdef BFD
631 struct sockaddr_bfd sa_bfd;
632 #endif
633 struct ifnet *ifp = NULL;
634 int len;
635
636 bzero(&info, sizeof(info));
637 info.rti_info[RTAX_DST] = rt_key(rt);
638 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
639 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
640 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
641 #ifdef BFD
642 if (rt->rt_flags & RTF_BFD) {
643 KERNEL_LOCK();
644 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
645 KERNEL_UNLOCK();
646 }
647 #endif
648 #ifdef MPLS
649 if (rt->rt_flags & RTF_MPLS) {
650 struct sockaddr_mpls sa_mpls;
651
652 bzero(&sa_mpls, sizeof(sa_mpls));
653 sa_mpls.smpls_family = AF_MPLS;
654 sa_mpls.smpls_len = sizeof(sa_mpls);
655 sa_mpls.smpls_label = ((struct rt_mpls *)
656 rt->rt_llinfo)->mpls_label;
657 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
658 info.rti_mpls = ((struct rt_mpls *)
659 rt->rt_llinfo)->mpls_operation;
660 }
661 #endif
662 ifp = if_get(rt->rt_ifidx);
663 if (ifp != NULL) {
664 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
665 info.rti_info[RTAX_IFA] = rtable_getsource(tableid,
666 info.rti_info[RTAX_DST]->sa_family);
667 if (info.rti_info[RTAX_IFA] == NULL)
668 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
669 if (ifp->if_flags & IFF_POINTOPOINT)
670 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
671 }
672 if_put(ifp);
673 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
674
675 /* build new route message */
676 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
677 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
678
679 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
680 rtm->rtm_type = type;
681 rtm->rtm_index = rt->rt_ifidx;
682 rtm->rtm_tableid = tableid;
683 rtm->rtm_priority = rt->rt_priority & RTP_MASK;
684 rtm->rtm_flags = rt->rt_flags;
685 rtm->rtm_pid = curproc->p_p->ps_pid;
686 rtm->rtm_seq = seq;
687 rtm_getmetrics(rt, &rtm->rtm_rmx);
688 rtm->rtm_addrs = info.rti_addrs;
689 #ifdef MPLS
690 rtm->rtm_mpls = info.rti_mpls;
691 #endif
692 return rtm;
693 }
694
695 int
route_output(struct mbuf * m,struct socket * so)696 route_output(struct mbuf *m, struct socket *so)
697 {
698 struct rt_msghdr *rtm = NULL;
699 struct rtentry *rt = NULL;
700 struct rt_addrinfo info;
701 struct ifnet *ifp;
702 int len, seq, useloopback, error = 0;
703 u_int tableid;
704 u_int8_t prio;
705 u_char vers, type;
706
707 if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
708 (m = m_pullup(m, sizeof(int32_t))) == NULL))
709 return (ENOBUFS);
710 if ((m->m_flags & M_PKTHDR) == 0)
711 panic("route_output");
712
713 useloopback = so->so_options & SO_USELOOPBACK;
714
715 /*
716 * The socket can't be closed concurrently because the file
717 * descriptor reference is still held.
718 */
719
720 sounlock(so);
721
722 len = m->m_pkthdr.len;
723 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) +
724 sizeof(rtm->rtm_hdrlen) ||
725 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
726 error = EINVAL;
727 goto fail;
728 }
729 vers = mtod(m, struct rt_msghdr *)->rtm_version;
730 switch (vers) {
731 case RTM_VERSION:
732 if (len < sizeof(struct rt_msghdr)) {
733 error = EINVAL;
734 goto fail;
735 }
736 if (len > RTM_MAXSIZE) {
737 error = EMSGSIZE;
738 goto fail;
739 }
740 rtm = malloc(len, M_RTABLE, M_WAITOK);
741 m_copydata(m, 0, len, rtm);
742 break;
743 default:
744 error = EPROTONOSUPPORT;
745 goto fail;
746 }
747
748 /* Verify that the caller is sending an appropriate message early */
749 switch (rtm->rtm_type) {
750 case RTM_ADD:
751 case RTM_DELETE:
752 case RTM_GET:
753 case RTM_CHANGE:
754 case RTM_PROPOSAL:
755 case RTM_SOURCE:
756 break;
757 default:
758 error = EOPNOTSUPP;
759 goto fail;
760 }
761 /*
762 * Verify that the header length is valid.
763 * All messages from userland start with a struct rt_msghdr.
764 */
765 if (rtm->rtm_hdrlen == 0) /* old client */
766 rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
767 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
768 len < rtm->rtm_hdrlen) {
769 error = EINVAL;
770 goto fail;
771 }
772
773 rtm->rtm_pid = curproc->p_p->ps_pid;
774
775 /*
776 * Verify that the caller has the appropriate privilege; RTM_GET
777 * is the only operation the non-superuser is allowed.
778 */
779 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
780 error = EACCES;
781 goto fail;
782 }
783 tableid = rtm->rtm_tableid;
784 if (!rtable_exists(tableid)) {
785 if (rtm->rtm_type == RTM_ADD) {
786 if ((error = rtable_add(tableid)) != 0)
787 goto fail;
788 } else {
789 error = EINVAL;
790 goto fail;
791 }
792 }
793
794 /* Do not let userland play with kernel-only flags. */
795 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
796 error = EINVAL;
797 goto fail;
798 }
799
800 /* make sure that kernel-only bits are not set */
801 rtm->rtm_priority &= RTP_MASK;
802 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
803 rtm->rtm_fmask &= RTF_FMASK;
804
805 if (rtm->rtm_priority != 0) {
806 if (rtm->rtm_priority > RTP_MAX ||
807 rtm->rtm_priority == RTP_LOCAL) {
808 error = EINVAL;
809 goto fail;
810 }
811 prio = rtm->rtm_priority;
812 } else if (rtm->rtm_type != RTM_ADD)
813 prio = RTP_ANY;
814 else if (rtm->rtm_flags & RTF_STATIC)
815 prio = 0;
816 else
817 prio = RTP_DEFAULT;
818
819 bzero(&info, sizeof(info));
820 info.rti_addrs = rtm->rtm_addrs;
821 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
822 len + (caddr_t)rtm, &info)) != 0)
823 goto fail;
824
825 info.rti_flags = rtm->rtm_flags;
826
827 if (rtm->rtm_type != RTM_SOURCE &&
828 rtm->rtm_type != RTM_PROPOSAL &&
829 (info.rti_info[RTAX_DST] == NULL ||
830 info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
831 (info.rti_info[RTAX_GATEWAY] != NULL &&
832 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
833 info.rti_info[RTAX_GENMASK] != NULL)) {
834 error = EINVAL;
835 goto fail;
836 }
837 #ifdef MPLS
838 info.rti_mpls = rtm->rtm_mpls;
839 #endif
840
841 if (info.rti_info[RTAX_GATEWAY] != NULL &&
842 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
843 (info.rti_flags & RTF_CLONING) == 0) {
844 info.rti_flags |= RTF_LLINFO;
845 }
846
847 /*
848 * Validate RTM_PROPOSAL and pass it along or error out.
849 */
850 if (rtm->rtm_type == RTM_PROPOSAL) {
851 if (rtm_validate_proposal(&info) == -1) {
852 error = EINVAL;
853 goto fail;
854 }
855 /*
856 * If this is a solicitation proposal forward request to
857 * all interfaces. Most handlers will ignore it but at least
858 * umb(4) will send a response to this event.
859 */
860 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
861 NET_LOCK();
862 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
863 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
864 }
865 NET_UNLOCK();
866 }
867 } else if (rtm->rtm_type == RTM_SOURCE) {
868 if (info.rti_info[RTAX_IFA] == NULL) {
869 error = EINVAL;
870 goto fail;
871 }
872 NET_LOCK();
873 error = rt_setsource(tableid, info.rti_info[RTAX_IFA]);
874 NET_UNLOCK();
875 if (error)
876 goto fail;
877 } else {
878 error = rtm_output(rtm, &rt, &info, prio, tableid);
879 if (!error) {
880 type = rtm->rtm_type;
881 seq = rtm->rtm_seq;
882 free(rtm, M_RTABLE, len);
883 NET_LOCK_SHARED();
884 rtm = rtm_report(rt, type, seq, tableid);
885 NET_UNLOCK_SHARED();
886 len = rtm->rtm_msglen;
887 }
888 }
889
890 rtfree(rt);
891 if (error) {
892 rtm->rtm_errno = error;
893 } else {
894 rtm->rtm_flags |= RTF_DONE;
895 }
896
897 /*
898 * Check to see if we don't want our own messages.
899 */
900 if (!useloopback) {
901 if (rtptable.rtp_count == 0) {
902 /* no other listener and no loopback of messages */
903 goto fail;
904 }
905 }
906 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
907 m_freem(m);
908 m = NULL;
909 } else if (m->m_pkthdr.len > len)
910 m_adj(m, len - m->m_pkthdr.len);
911 free(rtm, M_RTABLE, len);
912 if (m)
913 route_input(m, so, info.rti_info[RTAX_DST] ?
914 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
915 solock(so);
916
917 return (error);
918 fail:
919 free(rtm, M_RTABLE, len);
920 m_freem(m);
921 solock(so);
922
923 return (error);
924 }
925
926 int
rtm_output(struct rt_msghdr * rtm,struct rtentry ** prt,struct rt_addrinfo * info,uint8_t prio,unsigned int tableid)927 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
928 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
929 {
930 struct rtentry *rt = *prt;
931 struct ifnet *ifp = NULL;
932 int plen, newgate = 0, error = 0;
933
934 switch (rtm->rtm_type) {
935 case RTM_ADD:
936 if (info->rti_info[RTAX_GATEWAY] == NULL) {
937 error = EINVAL;
938 break;
939 }
940
941 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
942 if ((error = route_arp_conflict(rt, info))) {
943 rtfree(rt);
944 rt = NULL;
945 break;
946 }
947
948 /*
949 * We cannot go through a delete/create/insert cycle for
950 * cached route because this can lead to races in the
951 * receive path. Instead we update the L2 cache.
952 */
953 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) {
954 ifp = if_get(rt->rt_ifidx);
955 if (ifp == NULL) {
956 rtfree(rt);
957 rt = NULL;
958 error = ESRCH;
959 break;
960 }
961
962 goto change;
963 }
964
965 rtfree(rt);
966 rt = NULL;
967
968 NET_LOCK();
969 if ((error = rtm_getifa(info, tableid)) != 0) {
970 NET_UNLOCK();
971 break;
972 }
973 error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
974 NET_UNLOCK();
975 if (error == 0)
976 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
977 &rt->rt_rmx);
978 break;
979 case RTM_DELETE:
980 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
981 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
982 prio);
983 if (rt == NULL) {
984 error = ESRCH;
985 break;
986 }
987
988 /*
989 * If we got multipath routes, we require users to specify
990 * a matching gateway.
991 */
992 if (ISSET(rt->rt_flags, RTF_MPATH) &&
993 info->rti_info[RTAX_GATEWAY] == NULL) {
994 error = ESRCH;
995 break;
996 }
997
998 ifp = if_get(rt->rt_ifidx);
999 if (ifp == NULL) {
1000 rtfree(rt);
1001 rt = NULL;
1002 error = ESRCH;
1003 break;
1004 }
1005
1006 /*
1007 * Invalidate the cache of automagically created and
1008 * referenced L2 entries to make sure that ``rt_gwroute''
1009 * pointer stays valid for other CPUs.
1010 */
1011 if ((ISSET(rt->rt_flags, RTF_CACHED))) {
1012 NET_LOCK();
1013 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
1014 /* Reset the MTU of the gateway route. */
1015 rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
1016 route_cleargateway, rt);
1017 NET_UNLOCK();
1018 break;
1019 }
1020
1021 /*
1022 * Make sure that local routes are only modified by the
1023 * kernel.
1024 */
1025 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1026 error = EINVAL;
1027 break;
1028 }
1029
1030 rtfree(rt);
1031 rt = NULL;
1032
1033 NET_LOCK();
1034 error = rtrequest_delete(info, prio, ifp, &rt, tableid);
1035 NET_UNLOCK();
1036 break;
1037 case RTM_CHANGE:
1038 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1039 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1040 prio);
1041 /*
1042 * If we got multipath routes, we require users to specify
1043 * a matching gateway.
1044 */
1045 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
1046 (info->rti_info[RTAX_GATEWAY] == NULL)) {
1047 rtfree(rt);
1048 rt = NULL;
1049 }
1050
1051 /*
1052 * If RTAX_GATEWAY is the argument we're trying to
1053 * change, try to find a compatible route.
1054 */
1055 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
1056 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1057 info->rti_info[RTAX_NETMASK], NULL, prio);
1058 /* Ensure we don't pick a multipath one. */
1059 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
1060 rtfree(rt);
1061 rt = NULL;
1062 }
1063 }
1064
1065 if (rt == NULL) {
1066 error = ESRCH;
1067 break;
1068 }
1069
1070 /*
1071 * Make sure that local routes are only modified by the
1072 * kernel.
1073 */
1074 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1075 error = EINVAL;
1076 break;
1077 }
1078
1079 ifp = if_get(rt->rt_ifidx);
1080 if (ifp == NULL) {
1081 rtfree(rt);
1082 rt = NULL;
1083 error = ESRCH;
1084 break;
1085 }
1086
1087 /*
1088 * RTM_CHANGE needs a perfect match.
1089 */
1090 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1091 info->rti_info[RTAX_NETMASK]);
1092 if (rt_plen(rt) != plen) {
1093 error = ESRCH;
1094 break;
1095 }
1096
1097 if (info->rti_info[RTAX_GATEWAY] != NULL)
1098 if (rt->rt_gateway == NULL ||
1099 bcmp(rt->rt_gateway,
1100 info->rti_info[RTAX_GATEWAY],
1101 info->rti_info[RTAX_GATEWAY]->sa_len)) {
1102 newgate = 1;
1103 }
1104 /*
1105 * Check reachable gateway before changing the route.
1106 * New gateway could require new ifaddr, ifp;
1107 * flags may also be different; ifp may be specified
1108 * by ll sockaddr when protocol address is ambiguous.
1109 */
1110 if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1111 info->rti_info[RTAX_IFA] != NULL) {
1112 struct ifaddr *ifa = NULL;
1113
1114 NET_LOCK();
1115 if ((error = rtm_getifa(info, tableid)) != 0) {
1116 NET_UNLOCK();
1117 break;
1118 }
1119 ifa = info->rti_ifa;
1120 if (rt->rt_ifa != ifa) {
1121 ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1122 ifafree(rt->rt_ifa);
1123
1124 rt->rt_ifa = ifaref(ifa);
1125 rt->rt_ifidx = ifa->ifa_ifp->if_index;
1126 /* recheck link state after ifp change */
1127 rt_if_linkstate_change(rt, ifa->ifa_ifp,
1128 tableid);
1129 }
1130 NET_UNLOCK();
1131 }
1132 change:
1133 if (info->rti_info[RTAX_GATEWAY] != NULL) {
1134 /* When updating the gateway, make sure it is valid. */
1135 if (!newgate && rt->rt_gateway->sa_family !=
1136 info->rti_info[RTAX_GATEWAY]->sa_family) {
1137 error = EINVAL;
1138 break;
1139 }
1140
1141 NET_LOCK();
1142 error = rt_setgate(rt,
1143 info->rti_info[RTAX_GATEWAY], tableid);
1144 NET_UNLOCK();
1145 if (error)
1146 break;
1147 }
1148 #ifdef MPLS
1149 if (rtm->rtm_flags & RTF_MPLS) {
1150 NET_LOCK();
1151 error = rt_mpls_set(rt,
1152 info->rti_info[RTAX_SRC], info->rti_mpls);
1153 NET_UNLOCK();
1154 if (error)
1155 break;
1156 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1157 NET_LOCK();
1158 /* if gateway changed remove MPLS information */
1159 rt_mpls_clear(rt);
1160 NET_UNLOCK();
1161 }
1162 #endif
1163
1164 #ifdef BFD
1165 if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1166 KERNEL_LOCK();
1167 error = bfdset(rt);
1168 KERNEL_UNLOCK();
1169 if (error)
1170 break;
1171 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1172 ISSET(rtm->rtm_fmask, RTF_BFD)) {
1173 KERNEL_LOCK();
1174 bfdclear(rt);
1175 KERNEL_UNLOCK();
1176 }
1177 #endif
1178
1179 NET_LOCK();
1180 /* Hack to allow some flags to be toggled */
1181 if (rtm->rtm_fmask) {
1182 /* MPLS flag it is set by rt_mpls_set() */
1183 rtm->rtm_fmask &= ~RTF_MPLS;
1184 rtm->rtm_flags &= ~RTF_MPLS;
1185 rt->rt_flags =
1186 (rt->rt_flags & ~rtm->rtm_fmask) |
1187 (rtm->rtm_flags & rtm->rtm_fmask);
1188 }
1189 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1190
1191 ifp->if_rtrequest(ifp, RTM_ADD, rt);
1192
1193 if (info->rti_info[RTAX_LABEL] != NULL) {
1194 const char *rtlabel = ((const struct sockaddr_rtlabel *)
1195 info->rti_info[RTAX_LABEL])->sr_label;
1196 rtlabel_unref(rt->rt_labelid);
1197 rt->rt_labelid = rtlabel_name2id(rtlabel);
1198 }
1199 if_group_routechange(info->rti_info[RTAX_DST],
1200 info->rti_info[RTAX_NETMASK]);
1201 rt->rt_locks &= ~(rtm->rtm_inits);
1202 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1203 NET_UNLOCK();
1204 break;
1205 case RTM_GET:
1206 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1207 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1208 prio);
1209 if (rt == NULL)
1210 error = ESRCH;
1211 break;
1212 }
1213
1214 if_put(ifp);
1215 *prt = rt;
1216 return (error);
1217 }
1218
1219 struct ifaddr *
ifa_ifwithroute(int flags,const struct sockaddr * dst,const struct sockaddr * gateway,unsigned int rtableid)1220 ifa_ifwithroute(int flags, const struct sockaddr *dst,
1221 const struct sockaddr *gateway, unsigned int rtableid)
1222 {
1223 struct ifaddr *ifa;
1224
1225 if ((flags & RTF_GATEWAY) == 0) {
1226 /*
1227 * If we are adding a route to an interface,
1228 * and the interface is a pt to pt link
1229 * we should search for the destination
1230 * as our clue to the interface. Otherwise
1231 * we can use the local address.
1232 */
1233 ifa = NULL;
1234 if (flags & RTF_HOST)
1235 ifa = ifa_ifwithdstaddr(dst, rtableid);
1236 if (ifa == NULL)
1237 ifa = ifa_ifwithaddr(gateway, rtableid);
1238 } else {
1239 /*
1240 * If we are adding a route to a remote net
1241 * or host, the gateway may still be on the
1242 * other end of a pt to pt link.
1243 */
1244 ifa = ifa_ifwithdstaddr(gateway, rtableid);
1245 }
1246 if (ifa == NULL) {
1247 if (gateway->sa_family == AF_LINK) {
1248 const struct sockaddr_dl *sdl;
1249 struct ifnet *ifp;
1250
1251 sdl = satosdl_const(gateway);
1252 ifp = if_get(sdl->sdl_index);
1253 if (ifp != NULL)
1254 ifa = ifaof_ifpforaddr(dst, ifp);
1255 if_put(ifp);
1256 } else {
1257 struct rtentry *rt;
1258
1259 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1260 if (rt != NULL)
1261 ifa = rt->rt_ifa;
1262 rtfree(rt);
1263 }
1264 }
1265 if (ifa == NULL)
1266 return (NULL);
1267 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1268 struct ifaddr *oifa = ifa;
1269 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1270 if (ifa == NULL)
1271 ifa = oifa;
1272 }
1273 return (ifa);
1274 }
1275
1276 int
rtm_getifa(struct rt_addrinfo * info,unsigned int rtid)1277 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1278 {
1279 struct ifnet *ifp = NULL;
1280
1281 /*
1282 * The "returned" `ifa' is guaranteed to be alive only if
1283 * the NET_LOCK() is held.
1284 */
1285 NET_ASSERT_LOCKED();
1286
1287 /*
1288 * ifp may be specified by sockaddr_dl when protocol address
1289 * is ambiguous
1290 */
1291 if (info->rti_info[RTAX_IFP] != NULL) {
1292 const struct sockaddr_dl *sdl;
1293
1294 sdl = satosdl_const(info->rti_info[RTAX_IFP]);
1295 ifp = if_get(sdl->sdl_index);
1296 }
1297
1298 #ifdef IPSEC
1299 /*
1300 * If the destination is a PF_KEY address, we'll look
1301 * for the existence of a encap interface number or address
1302 * in the options list of the gateway. By default, we'll return
1303 * enc0.
1304 */
1305 if (info->rti_info[RTAX_DST] &&
1306 info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1307 info->rti_ifa = enc_getifa(rtid, 0);
1308 #endif
1309
1310 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1311 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1312
1313 if (info->rti_ifa == NULL) {
1314 const struct sockaddr *sa;
1315
1316 if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1317 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1318 sa = info->rti_info[RTAX_DST];
1319
1320 if (sa != NULL && ifp != NULL)
1321 info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1322 else if (info->rti_info[RTAX_DST] != NULL &&
1323 info->rti_info[RTAX_GATEWAY] != NULL)
1324 info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1325 info->rti_info[RTAX_DST],
1326 info->rti_info[RTAX_GATEWAY],
1327 rtid);
1328 else if (sa != NULL)
1329 info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1330 sa, sa, rtid);
1331 }
1332
1333 if_put(ifp);
1334
1335 if (info->rti_ifa == NULL)
1336 return (ENETUNREACH);
1337
1338 return (0);
1339 }
1340
1341 int
route_cleargateway(struct rtentry * rt,void * arg,unsigned int rtableid)1342 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1343 {
1344 struct rtentry *nhrt = arg;
1345
1346 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1347 !ISSET(rt->rt_locks, RTV_MTU))
1348 rt->rt_mtu = 0;
1349
1350 return (0);
1351 }
1352
1353 /*
1354 * Check if the user request to insert an ARP entry does not conflict
1355 * with existing ones.
1356 *
1357 * Only two entries are allowed for a given IP address: a private one
1358 * (priv) and a public one (pub).
1359 */
1360 int
route_arp_conflict(struct rtentry * rt,struct rt_addrinfo * info)1361 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1362 {
1363 int proxy = (info->rti_flags & RTF_ANNOUNCE);
1364
1365 if ((info->rti_flags & RTF_LLINFO) == 0 ||
1366 (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1367 return (0);
1368
1369 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1370 return (0);
1371
1372 /* If the entry is cached, it can be updated. */
1373 if (ISSET(rt->rt_flags, RTF_CACHED))
1374 return (0);
1375
1376 /*
1377 * Same destination, not cached and both "priv" or "pub" conflict.
1378 * If a second entry exists, it always conflict.
1379 */
1380 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1381 ISSET(rt->rt_flags, RTF_MPATH))
1382 return (EEXIST);
1383
1384 /* No conflict but an entry exist so we need to force mpath. */
1385 info->rti_flags |= RTF_MPATH;
1386 return (0);
1387 }
1388
1389 void
rtm_setmetrics(u_long which,const struct rt_metrics * in,struct rt_kmetrics * out)1390 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1391 struct rt_kmetrics *out)
1392 {
1393 int64_t expire;
1394
1395 if (which & RTV_MTU)
1396 out->rmx_mtu = in->rmx_mtu;
1397 if (which & RTV_EXPIRE) {
1398 expire = in->rmx_expire;
1399 if (expire != 0) {
1400 expire -= gettime();
1401 expire += getuptime();
1402 }
1403
1404 out->rmx_expire = expire;
1405 }
1406 }
1407
1408 void
rtm_getmetrics(const struct rtentry * rt,struct rt_metrics * out)1409 rtm_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
1410 {
1411 const struct rt_kmetrics *in = &rt->rt_rmx;
1412 int64_t expire;
1413
1414 expire = in->rmx_expire;
1415 if (expire == 0)
1416 expire = rt_timer_get_expire(rt);
1417 if (expire != 0) {
1418 expire -= getuptime();
1419 expire += gettime();
1420 }
1421
1422 bzero(out, sizeof(*out));
1423 out->rmx_locks = in->rmx_locks;
1424 out->rmx_mtu = in->rmx_mtu;
1425 out->rmx_expire = expire;
1426 out->rmx_pksent = in->rmx_pksent;
1427 }
1428
1429 #define ROUNDUP(a) \
1430 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1431 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1432
1433 int
rtm_xaddrs(caddr_t cp,caddr_t cplim,struct rt_addrinfo * rtinfo)1434 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1435 {
1436 int i;
1437
1438 /*
1439 * Parse address bits, split address storage in chunks, and
1440 * set info pointers. Use sa_len for traversing the memory
1441 * and check that we stay within in the limit.
1442 */
1443 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1444 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1445 struct sockaddr *sa;
1446
1447 if ((rtinfo->rti_addrs & (1U << i)) == 0)
1448 continue;
1449 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1450 return (EINVAL);
1451 sa = (struct sockaddr *)cp;
1452 if (cp + sa->sa_len > cplim)
1453 return (EINVAL);
1454 rtinfo->rti_info[i] = sa;
1455 ADVANCE(cp, sa);
1456 }
1457 /*
1458 * Check that the address family is suitable for the route address
1459 * type. Check that each address has a size that fits its family
1460 * and its length is within the size. Strings within addresses must
1461 * be NUL terminated.
1462 */
1463 for (i = 0; i < RTAX_MAX; i++) {
1464 const struct sockaddr *sa;
1465 size_t len, maxlen, size;
1466
1467 sa = rtinfo->rti_info[i];
1468 if (sa == NULL)
1469 continue;
1470 maxlen = size = 0;
1471 switch (i) {
1472 case RTAX_DST:
1473 case RTAX_GATEWAY:
1474 case RTAX_SRC:
1475 switch (sa->sa_family) {
1476 case AF_INET:
1477 size = sizeof(struct sockaddr_in);
1478 break;
1479 case AF_LINK:
1480 size = sizeof(struct sockaddr_dl);
1481 break;
1482 #ifdef INET6
1483 case AF_INET6:
1484 size = sizeof(struct sockaddr_in6);
1485 break;
1486 #endif
1487 #ifdef MPLS
1488 case AF_MPLS:
1489 size = sizeof(struct sockaddr_mpls);
1490 break;
1491 #endif
1492 }
1493 break;
1494 case RTAX_IFP:
1495 if (sa->sa_family != AF_LINK)
1496 return (EAFNOSUPPORT);
1497 /*
1498 * XXX Should be sizeof(struct sockaddr_dl), but
1499 * route(8) has a bug and provides less memory.
1500 * arp(8) has another bug and uses sizeof pointer.
1501 */
1502 size = 4;
1503 break;
1504 case RTAX_IFA:
1505 switch (sa->sa_family) {
1506 case AF_INET:
1507 size = sizeof(struct sockaddr_in);
1508 break;
1509 #ifdef INET6
1510 case AF_INET6:
1511 size = sizeof(struct sockaddr_in6);
1512 break;
1513 #endif
1514 default:
1515 return (EAFNOSUPPORT);
1516 }
1517 break;
1518 case RTAX_LABEL:
1519 if (sa->sa_family != AF_UNSPEC)
1520 return (EAFNOSUPPORT);
1521 maxlen = RTLABEL_LEN;
1522 size = sizeof(struct sockaddr_rtlabel);
1523 break;
1524 #ifdef BFD
1525 case RTAX_BFD:
1526 if (sa->sa_family != AF_UNSPEC)
1527 return (EAFNOSUPPORT);
1528 size = sizeof(struct sockaddr_bfd);
1529 break;
1530 #endif
1531 case RTAX_DNS:
1532 /* more validation in rtm_validate_proposal */
1533 if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1534 return (EINVAL);
1535 if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1536 sr_dns))
1537 return (EINVAL);
1538 switch (sa->sa_family) {
1539 case AF_INET:
1540 #ifdef INET6
1541 case AF_INET6:
1542 #endif
1543 break;
1544 default:
1545 return (EAFNOSUPPORT);
1546 }
1547 break;
1548 case RTAX_STATIC:
1549 switch (sa->sa_family) {
1550 case AF_INET:
1551 #ifdef INET6
1552 case AF_INET6:
1553 #endif
1554 break;
1555 default:
1556 return (EAFNOSUPPORT);
1557 }
1558 maxlen = RTSTATIC_LEN;
1559 size = sizeof(struct sockaddr_rtstatic);
1560 break;
1561 case RTAX_SEARCH:
1562 if (sa->sa_family != AF_UNSPEC)
1563 return (EAFNOSUPPORT);
1564 maxlen = RTSEARCH_LEN;
1565 size = sizeof(struct sockaddr_rtsearch);
1566 break;
1567 }
1568 if (size) {
1569 /* memory for the full struct must be provided */
1570 if (sa->sa_len < size)
1571 return (EINVAL);
1572 }
1573 if (maxlen) {
1574 /* this should not happen */
1575 if (2 + maxlen > size)
1576 return (EINVAL);
1577 /* strings must be NUL terminated within the struct */
1578 len = strnlen(sa->sa_data, maxlen);
1579 if (len >= maxlen || 2 + len >= sa->sa_len)
1580 return (EINVAL);
1581 break;
1582 }
1583 }
1584 return (0);
1585 }
1586
1587 struct mbuf *
rtm_msg1(int type,struct rt_addrinfo * rtinfo)1588 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1589 {
1590 struct rt_msghdr *rtm;
1591 struct mbuf *m;
1592 int i;
1593 const struct sockaddr *sa;
1594 int len, dlen, hlen;
1595
1596 switch (type) {
1597 case RTM_DELADDR:
1598 case RTM_NEWADDR:
1599 hlen = sizeof(struct ifa_msghdr);
1600 break;
1601 case RTM_IFINFO:
1602 hlen = sizeof(struct if_msghdr);
1603 break;
1604 case RTM_IFANNOUNCE:
1605 hlen = sizeof(struct if_announcemsghdr);
1606 break;
1607 #ifdef BFD
1608 case RTM_BFD:
1609 hlen = sizeof(struct bfd_msghdr);
1610 break;
1611 #endif
1612 case RTM_80211INFO:
1613 hlen = sizeof(struct if_ieee80211_msghdr);
1614 break;
1615 default:
1616 hlen = sizeof(struct rt_msghdr);
1617 break;
1618 }
1619 len = hlen;
1620 for (i = 0; i < RTAX_MAX; i++) {
1621 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1622 continue;
1623 len += ROUNDUP(sa->sa_len);
1624 }
1625 if (len > MCLBYTES)
1626 panic("rtm_msg1");
1627 m = m_gethdr(M_DONTWAIT, MT_DATA);
1628 if (m && len > MHLEN) {
1629 MCLGET(m, M_DONTWAIT);
1630 if ((m->m_flags & M_EXT) == 0) {
1631 m_free(m);
1632 m = NULL;
1633 }
1634 }
1635 if (m == NULL)
1636 return (m);
1637 m->m_pkthdr.len = m->m_len = len;
1638 m->m_pkthdr.ph_ifidx = 0;
1639 rtm = mtod(m, struct rt_msghdr *);
1640 bzero(rtm, len);
1641 len = hlen;
1642 for (i = 0; i < RTAX_MAX; i++) {
1643 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1644 continue;
1645 rtinfo->rti_addrs |= (1U << i);
1646 dlen = ROUNDUP(sa->sa_len);
1647 if (m_copyback(m, len, sa->sa_len, sa, M_NOWAIT)) {
1648 m_freem(m);
1649 return (NULL);
1650 }
1651 len += dlen;
1652 }
1653 rtm->rtm_msglen = len;
1654 rtm->rtm_hdrlen = hlen;
1655 rtm->rtm_version = RTM_VERSION;
1656 rtm->rtm_type = type;
1657 return (m);
1658 }
1659
1660 int
rtm_msg2(int type,int vers,struct rt_addrinfo * rtinfo,caddr_t cp,struct walkarg * w)1661 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1662 struct walkarg *w)
1663 {
1664 int i;
1665 int len, dlen, hlen, second_time = 0;
1666 caddr_t cp0;
1667
1668 rtinfo->rti_addrs = 0;
1669 again:
1670 switch (type) {
1671 case RTM_DELADDR:
1672 case RTM_NEWADDR:
1673 len = sizeof(struct ifa_msghdr);
1674 break;
1675 case RTM_IFINFO:
1676 len = sizeof(struct if_msghdr);
1677 break;
1678 default:
1679 len = sizeof(struct rt_msghdr);
1680 break;
1681 }
1682 hlen = len;
1683 if ((cp0 = cp) != NULL)
1684 cp += len;
1685 for (i = 0; i < RTAX_MAX; i++) {
1686 const struct sockaddr *sa;
1687
1688 if ((sa = rtinfo->rti_info[i]) == NULL)
1689 continue;
1690 rtinfo->rti_addrs |= (1U << i);
1691 dlen = ROUNDUP(sa->sa_len);
1692 if (cp) {
1693 bcopy(sa, cp, sa->sa_len);
1694 bzero(cp + sa->sa_len, dlen - sa->sa_len);
1695 cp += dlen;
1696 }
1697 len += dlen;
1698 }
1699 /* align message length to the next natural boundary */
1700 len = ALIGN(len);
1701 if (cp == 0 && w != NULL && !second_time) {
1702 w->w_needed += len;
1703 if (w->w_needed <= w->w_given && w->w_where) {
1704 if (w->w_tmemsize < len) {
1705 free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1706 w->w_tmem = malloc(len, M_RTABLE,
1707 M_NOWAIT | M_ZERO);
1708 if (w->w_tmem)
1709 w->w_tmemsize = len;
1710 }
1711 if (w->w_tmem) {
1712 cp = w->w_tmem;
1713 second_time = 1;
1714 goto again;
1715 } else
1716 w->w_where = 0;
1717 }
1718 }
1719 if (cp && w) /* clear the message header */
1720 bzero(cp0, hlen);
1721
1722 if (cp) {
1723 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1724
1725 rtm->rtm_version = RTM_VERSION;
1726 rtm->rtm_type = type;
1727 rtm->rtm_msglen = len;
1728 rtm->rtm_hdrlen = hlen;
1729 }
1730 return (len);
1731 }
1732
1733 void
rtm_send(struct rtentry * rt,int cmd,int error,unsigned int rtableid)1734 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1735 {
1736 struct rt_addrinfo info;
1737 struct ifnet *ifp;
1738 struct sockaddr_rtlabel sa_rl;
1739 struct sockaddr_in6 sa_mask;
1740
1741 memset(&info, 0, sizeof(info));
1742 info.rti_info[RTAX_DST] = rt_key(rt);
1743 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1744 if (!ISSET(rt->rt_flags, RTF_HOST))
1745 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1746 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1747 ifp = if_get(rt->rt_ifidx);
1748 if (ifp != NULL) {
1749 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1750 info.rti_info[RTAX_IFA] = rtable_getsource(rtableid,
1751 info.rti_info[RTAX_DST]->sa_family);
1752 if (info.rti_info[RTAX_IFA] == NULL)
1753 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1754 }
1755
1756 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1757 rtableid);
1758 if_put(ifp);
1759 }
1760
1761 /*
1762 * This routine is called to generate a message from the routing
1763 * socket indicating that a redirect has occurred, a routing lookup
1764 * has failed, or that a protocol has detected timeouts to a particular
1765 * destination.
1766 */
1767 void
rtm_miss(int type,struct rt_addrinfo * rtinfo,int flags,uint8_t prio,u_int ifidx,int error,u_int tableid)1768 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1769 u_int ifidx, int error, u_int tableid)
1770 {
1771 struct rt_msghdr *rtm;
1772 struct mbuf *m;
1773 const struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1774
1775 if (rtptable.rtp_count == 0)
1776 return;
1777 m = rtm_msg1(type, rtinfo);
1778 if (m == NULL)
1779 return;
1780 rtm = mtod(m, struct rt_msghdr *);
1781 rtm->rtm_flags = RTF_DONE | flags;
1782 rtm->rtm_priority = prio;
1783 rtm->rtm_errno = error;
1784 rtm->rtm_tableid = tableid;
1785 rtm->rtm_addrs = rtinfo->rti_addrs;
1786 rtm->rtm_index = ifidx;
1787 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1788 }
1789
1790 /*
1791 * This routine is called to generate a message from the routing
1792 * socket indicating that the status of a network interface has changed.
1793 */
1794 void
rtm_ifchg(struct ifnet * ifp)1795 rtm_ifchg(struct ifnet *ifp)
1796 {
1797 struct rt_addrinfo info;
1798 struct if_msghdr *ifm;
1799 struct mbuf *m;
1800
1801 if (rtptable.rtp_count == 0)
1802 return;
1803 memset(&info, 0, sizeof(info));
1804 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1805 m = rtm_msg1(RTM_IFINFO, &info);
1806 if (m == NULL)
1807 return;
1808 ifm = mtod(m, struct if_msghdr *);
1809 ifm->ifm_index = ifp->if_index;
1810 ifm->ifm_tableid = ifp->if_rdomain;
1811 ifm->ifm_flags = ifp->if_flags;
1812 ifm->ifm_xflags = ifp->if_xflags;
1813 if_getdata(ifp, &ifm->ifm_data);
1814 ifm->ifm_addrs = info.rti_addrs;
1815 route_input(m, NULL, AF_UNSPEC);
1816 }
1817
1818 /*
1819 * This is called to generate messages from the routing socket
1820 * indicating a network interface has had addresses associated with it.
1821 * if we ever reverse the logic and replace messages TO the routing
1822 * socket indicate a request to configure interfaces, then it will
1823 * be unnecessary as the routing socket will automatically generate
1824 * copies of it.
1825 */
1826 void
rtm_addr(int cmd,struct ifaddr * ifa)1827 rtm_addr(int cmd, struct ifaddr *ifa)
1828 {
1829 struct ifnet *ifp = ifa->ifa_ifp;
1830 struct mbuf *m;
1831 struct rt_addrinfo info;
1832 struct ifa_msghdr *ifam;
1833
1834 if (rtptable.rtp_count == 0)
1835 return;
1836
1837 memset(&info, 0, sizeof(info));
1838 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1839 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1840 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1841 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1842 if ((m = rtm_msg1(cmd, &info)) == NULL)
1843 return;
1844 ifam = mtod(m, struct ifa_msghdr *);
1845 ifam->ifam_index = ifp->if_index;
1846 ifam->ifam_metric = ifa->ifa_metric;
1847 ifam->ifam_flags = ifa->ifa_flags;
1848 ifam->ifam_addrs = info.rti_addrs;
1849 ifam->ifam_tableid = ifp->if_rdomain;
1850
1851 route_input(m, NULL,
1852 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1853 }
1854
1855 /*
1856 * This is called to generate routing socket messages indicating
1857 * network interface arrival and departure.
1858 */
1859 void
rtm_ifannounce(struct ifnet * ifp,int what)1860 rtm_ifannounce(struct ifnet *ifp, int what)
1861 {
1862 struct if_announcemsghdr *ifan;
1863 struct mbuf *m;
1864
1865 if (rtptable.rtp_count == 0)
1866 return;
1867 m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1868 if (m == NULL)
1869 return;
1870 ifan = mtod(m, struct if_announcemsghdr *);
1871 ifan->ifan_index = ifp->if_index;
1872 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1873 ifan->ifan_what = what;
1874 route_input(m, NULL, AF_UNSPEC);
1875 }
1876
1877 #ifdef BFD
1878 /*
1879 * This is used to generate routing socket messages indicating
1880 * the state of a BFD session.
1881 */
1882 void
rtm_bfd(struct bfd_config * bfd)1883 rtm_bfd(struct bfd_config *bfd)
1884 {
1885 struct bfd_msghdr *bfdm;
1886 struct sockaddr_bfd sa_bfd;
1887 struct mbuf *m;
1888 struct rt_addrinfo info;
1889
1890 if (rtptable.rtp_count == 0)
1891 return;
1892 memset(&info, 0, sizeof(info));
1893 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1894 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1895
1896 m = rtm_msg1(RTM_BFD, &info);
1897 if (m == NULL)
1898 return;
1899 bfdm = mtod(m, struct bfd_msghdr *);
1900 bfdm->bm_addrs = info.rti_addrs;
1901
1902 KERNEL_ASSERT_LOCKED();
1903 bfd2sa(bfd->bc_rt, &sa_bfd);
1904 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1905
1906 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1907 }
1908 #endif /* BFD */
1909
1910 /*
1911 * This is used to generate routing socket messages indicating
1912 * the state of an ieee80211 interface.
1913 */
1914 void
rtm_80211info(struct ifnet * ifp,struct if_ieee80211_data * ifie)1915 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1916 {
1917 struct if_ieee80211_msghdr *ifim;
1918 struct mbuf *m;
1919
1920 if (rtptable.rtp_count == 0)
1921 return;
1922 m = rtm_msg1(RTM_80211INFO, NULL);
1923 if (m == NULL)
1924 return;
1925 ifim = mtod(m, struct if_ieee80211_msghdr *);
1926 ifim->ifim_index = ifp->if_index;
1927 ifim->ifim_tableid = ifp->if_rdomain;
1928
1929 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1930 route_input(m, NULL, AF_UNSPEC);
1931 }
1932
1933 /*
1934 * This is used to generate routing socket messages indicating
1935 * the address selection proposal from an interface.
1936 */
1937 void
rtm_proposal(struct ifnet * ifp,struct rt_addrinfo * rtinfo,int flags,uint8_t prio)1938 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1939 uint8_t prio)
1940 {
1941 struct rt_msghdr *rtm;
1942 struct mbuf *m;
1943
1944 m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1945 if (m == NULL)
1946 return;
1947 rtm = mtod(m, struct rt_msghdr *);
1948 rtm->rtm_flags = RTF_DONE | flags;
1949 rtm->rtm_priority = prio;
1950 rtm->rtm_tableid = ifp->if_rdomain;
1951 rtm->rtm_index = ifp->if_index;
1952 rtm->rtm_addrs = rtinfo->rti_addrs;
1953
1954 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1955 }
1956
1957 /*
1958 * This is used in dumping the kernel table via sysctl().
1959 */
1960 int
sysctl_dumpentry(struct rtentry * rt,void * v,unsigned int id)1961 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1962 {
1963 struct walkarg *w = v;
1964 int error = 0, size;
1965 struct rt_addrinfo info;
1966 struct ifnet *ifp;
1967 #ifdef BFD
1968 struct sockaddr_bfd sa_bfd;
1969 #endif
1970 struct sockaddr_rtlabel sa_rl;
1971 struct sockaddr_in6 sa_mask;
1972
1973 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1974 return 0;
1975 if (w->w_op == NET_RT_DUMP && w->w_arg) {
1976 u_int8_t prio = w->w_arg & RTP_MASK;
1977 if (w->w_arg < 0) {
1978 prio = (-w->w_arg) & RTP_MASK;
1979 /* Show all routes that are not this priority */
1980 if (prio == (rt->rt_priority & RTP_MASK))
1981 return 0;
1982 } else {
1983 if (prio != (rt->rt_priority & RTP_MASK) &&
1984 prio != RTP_ANY)
1985 return 0;
1986 }
1987 }
1988 bzero(&info, sizeof(info));
1989 info.rti_info[RTAX_DST] = rt_key(rt);
1990 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1991 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1992 ifp = if_get(rt->rt_ifidx);
1993 if (ifp != NULL) {
1994 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1995 info.rti_info[RTAX_IFA] =
1996 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
1997 if (info.rti_info[RTAX_IFA] == NULL)
1998 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1999 if (ifp->if_flags & IFF_POINTOPOINT)
2000 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
2001 }
2002 if_put(ifp);
2003 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
2004 #ifdef BFD
2005 if (rt->rt_flags & RTF_BFD) {
2006 KERNEL_ASSERT_LOCKED();
2007 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
2008 }
2009 #endif
2010 #ifdef MPLS
2011 if (rt->rt_flags & RTF_MPLS) {
2012 struct sockaddr_mpls sa_mpls;
2013
2014 bzero(&sa_mpls, sizeof(sa_mpls));
2015 sa_mpls.smpls_family = AF_MPLS;
2016 sa_mpls.smpls_len = sizeof(sa_mpls);
2017 sa_mpls.smpls_label = ((struct rt_mpls *)
2018 rt->rt_llinfo)->mpls_label;
2019 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
2020 info.rti_mpls = ((struct rt_mpls *)
2021 rt->rt_llinfo)->mpls_operation;
2022 }
2023 #endif
2024
2025 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
2026 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) {
2027 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
2028
2029 rtm->rtm_pid = curproc->p_p->ps_pid;
2030 rtm->rtm_flags = RTF_DONE | rt->rt_flags;
2031 rtm->rtm_priority = rt->rt_priority & RTP_MASK;
2032 rtm_getmetrics(rt, &rtm->rtm_rmx);
2033 /* Do not account the routing table's reference. */
2034 rtm->rtm_rmx.rmx_refcnt = refcnt_read(&rt->rt_refcnt) - 1;
2035 rtm->rtm_index = rt->rt_ifidx;
2036 rtm->rtm_addrs = info.rti_addrs;
2037 rtm->rtm_tableid = id;
2038 #ifdef MPLS
2039 rtm->rtm_mpls = info.rti_mpls;
2040 #endif
2041 if ((error = copyout(rtm, w->w_where, size)) != 0)
2042 w->w_where = NULL;
2043 else
2044 w->w_where += size;
2045 }
2046 return (error);
2047 }
2048
2049 int
sysctl_iflist(int af,struct walkarg * w)2050 sysctl_iflist(int af, struct walkarg *w)
2051 {
2052 struct ifnet *ifp;
2053 struct ifaddr *ifa;
2054 struct rt_addrinfo info;
2055 int len, error = 0;
2056
2057 bzero(&info, sizeof(info));
2058 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2059 if (w->w_arg && w->w_arg != ifp->if_index)
2060 continue;
2061 /* Copy the link-layer address first */
2062 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
2063 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
2064 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) {
2065 struct if_msghdr *ifm;
2066
2067 ifm = (struct if_msghdr *)w->w_tmem;
2068 ifm->ifm_index = ifp->if_index;
2069 ifm->ifm_tableid = ifp->if_rdomain;
2070 ifm->ifm_flags = ifp->if_flags;
2071 if_getdata(ifp, &ifm->ifm_data);
2072 ifm->ifm_addrs = info.rti_addrs;
2073 error = copyout(ifm, w->w_where, len);
2074 if (error)
2075 return (error);
2076 w->w_where += len;
2077 }
2078 info.rti_info[RTAX_IFP] = NULL;
2079 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2080 KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
2081 if (af && af != ifa->ifa_addr->sa_family)
2082 continue;
2083 info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2084 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2085 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2086 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
2087 if (w->w_where && w->w_tmem &&
2088 w->w_needed <= w->w_given) {
2089 struct ifa_msghdr *ifam;
2090
2091 ifam = (struct ifa_msghdr *)w->w_tmem;
2092 ifam->ifam_index = ifa->ifa_ifp->if_index;
2093 ifam->ifam_flags = ifa->ifa_flags;
2094 ifam->ifam_metric = ifa->ifa_metric;
2095 ifam->ifam_addrs = info.rti_addrs;
2096 error = copyout(w->w_tmem, w->w_where, len);
2097 if (error)
2098 return (error);
2099 w->w_where += len;
2100 }
2101 }
2102 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2103 info.rti_info[RTAX_BRD] = NULL;
2104 }
2105 return (0);
2106 }
2107
2108 int
sysctl_ifnames(struct walkarg * w)2109 sysctl_ifnames(struct walkarg *w)
2110 {
2111 struct if_nameindex_msg ifn;
2112 struct ifnet *ifp;
2113 int error = 0;
2114
2115 /* XXX ignore tableid for now */
2116 TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2117 if (w->w_arg && w->w_arg != ifp->if_index)
2118 continue;
2119 w->w_needed += sizeof(ifn);
2120 if (w->w_where && w->w_needed <= w->w_given) {
2121
2122 memset(&ifn, 0, sizeof(ifn));
2123 ifn.if_index = ifp->if_index;
2124 strlcpy(ifn.if_name, ifp->if_xname,
2125 sizeof(ifn.if_name));
2126 error = copyout(&ifn, w->w_where, sizeof(ifn));
2127 if (error)
2128 return (error);
2129 w->w_where += sizeof(ifn);
2130 }
2131 }
2132
2133 return (0);
2134 }
2135
2136 int
sysctl_source(int af,u_int tableid,struct walkarg * w)2137 sysctl_source(int af, u_int tableid, struct walkarg *w)
2138 {
2139 struct sockaddr *sa;
2140 int size, error = 0;
2141
2142 sa = rtable_getsource(tableid, af);
2143 if (sa) {
2144 switch (sa->sa_family) {
2145 case AF_INET:
2146 size = sizeof(struct sockaddr_in);
2147 break;
2148 #ifdef INET6
2149 case AF_INET6:
2150 size = sizeof(struct sockaddr_in6);
2151 break;
2152 #endif
2153 default:
2154 return (0);
2155 }
2156 w->w_needed += size;
2157 if (w->w_where && w->w_needed <= w->w_given) {
2158 if ((error = copyout(sa, w->w_where, size)))
2159 return (error);
2160 w->w_where += size;
2161 }
2162 }
2163 return (0);
2164 }
2165
2166 int
sysctl_rtable(int * name,u_int namelen,void * where,size_t * given,void * new,size_t newlen)2167 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2168 size_t newlen)
2169 {
2170 int i, error = EINVAL;
2171 u_char af;
2172 struct walkarg w;
2173 struct rt_tableinfo tableinfo;
2174 u_int tableid = 0;
2175
2176 if (new)
2177 return (EPERM);
2178 if (namelen < 3 || namelen > 4)
2179 return (EINVAL);
2180 af = name[0];
2181 bzero(&w, sizeof(w));
2182 w.w_where = where;
2183 w.w_given = *given;
2184 w.w_op = name[1];
2185 w.w_arg = name[2];
2186
2187 if (namelen == 4) {
2188 tableid = name[3];
2189 if (!rtable_exists(tableid))
2190 return (ENOENT);
2191 } else
2192 tableid = curproc->p_p->ps_rtableid;
2193
2194 switch (w.w_op) {
2195 case NET_RT_DUMP:
2196 case NET_RT_FLAGS:
2197 NET_LOCK_SHARED();
2198 for (i = 1; i <= AF_MAX; i++) {
2199 if (af != 0 && af != i)
2200 continue;
2201
2202 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2203 &w);
2204 if (error == EAFNOSUPPORT)
2205 error = 0;
2206 if (error)
2207 break;
2208 }
2209 NET_UNLOCK_SHARED();
2210 break;
2211
2212 case NET_RT_IFLIST:
2213 NET_LOCK_SHARED();
2214 error = sysctl_iflist(af, &w);
2215 NET_UNLOCK_SHARED();
2216 break;
2217
2218 case NET_RT_STATS:
2219 return (sysctl_rtable_rtstat(where, given, new));
2220 case NET_RT_TABLE:
2221 tableid = w.w_arg;
2222 if (!rtable_exists(tableid))
2223 return (ENOENT);
2224 memset(&tableinfo, 0, sizeof tableinfo);
2225 tableinfo.rti_tableid = tableid;
2226 tableinfo.rti_domainid = rtable_l2(tableid);
2227 error = sysctl_rdstruct(where, given, new,
2228 &tableinfo, sizeof(tableinfo));
2229 return (error);
2230 case NET_RT_IFNAMES:
2231 NET_LOCK_SHARED();
2232 error = sysctl_ifnames(&w);
2233 NET_UNLOCK_SHARED();
2234 break;
2235 case NET_RT_SOURCE:
2236 tableid = w.w_arg;
2237 if (!rtable_exists(tableid))
2238 return (ENOENT);
2239 NET_LOCK_SHARED();
2240 for (i = 1; i <= AF_MAX; i++) {
2241 if (af != 0 && af != i)
2242 continue;
2243
2244 error = sysctl_source(i, tableid, &w);
2245 if (error == EAFNOSUPPORT)
2246 error = 0;
2247 if (error)
2248 break;
2249 }
2250 NET_UNLOCK_SHARED();
2251 break;
2252 }
2253 free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2254 if (where) {
2255 *given = w.w_where - (caddr_t)where;
2256 if (w.w_needed > w.w_given)
2257 return (ENOMEM);
2258 } else if (w.w_needed == 0) {
2259 *given = 0;
2260 } else {
2261 *given = roundup(w.w_needed + MAX(w.w_needed / 10, 1024),
2262 PAGE_SIZE);
2263 }
2264 return (error);
2265 }
2266
2267 int
sysctl_rtable_rtstat(void * oldp,size_t * oldlenp,void * newp)2268 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2269 {
2270 extern struct cpumem *rtcounters;
2271 uint64_t counters[rts_ncounters];
2272 struct rtstat rtstat;
2273 uint32_t *words = (uint32_t *)&rtstat;
2274 int i;
2275
2276 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2277 memset(&rtstat, 0, sizeof rtstat);
2278 counters_read(rtcounters, counters, nitems(counters), NULL);
2279
2280 for (i = 0; i < nitems(counters); i++)
2281 words[i] = (uint32_t)counters[i];
2282
2283 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2284 }
2285
2286 int
rtm_validate_proposal(struct rt_addrinfo * info)2287 rtm_validate_proposal(struct rt_addrinfo *info)
2288 {
2289 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2290 RTA_SEARCH)) {
2291 return -1;
2292 }
2293
2294 if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2295 const struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2296 if (sa == NULL)
2297 return -1;
2298 switch (sa->sa_family) {
2299 case AF_INET:
2300 if (sa->sa_len != sizeof(struct sockaddr_in))
2301 return -1;
2302 break;
2303 case AF_INET6:
2304 if (sa->sa_len != sizeof(struct sockaddr_in6))
2305 return -1;
2306 break;
2307 default:
2308 return -1;
2309 }
2310 }
2311
2312 if (ISSET(info->rti_addrs, RTA_IFA)) {
2313 const struct sockaddr *sa = info->rti_info[RTAX_IFA];
2314 if (sa == NULL)
2315 return -1;
2316 switch (sa->sa_family) {
2317 case AF_INET:
2318 if (sa->sa_len != sizeof(struct sockaddr_in))
2319 return -1;
2320 break;
2321 case AF_INET6:
2322 if (sa->sa_len != sizeof(struct sockaddr_in6))
2323 return -1;
2324 break;
2325 default:
2326 return -1;
2327 }
2328 }
2329
2330 if (ISSET(info->rti_addrs, RTA_DNS)) {
2331 const struct sockaddr_rtdns *rtdns =
2332 (const struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2333 if (rtdns == NULL)
2334 return -1;
2335 if (rtdns->sr_len > sizeof(*rtdns))
2336 return -1;
2337 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2338 return -1;
2339 switch (rtdns->sr_family) {
2340 case AF_INET:
2341 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2342 sr_dns)) % sizeof(struct in_addr) != 0)
2343 return -1;
2344 break;
2345 #ifdef INET6
2346 case AF_INET6:
2347 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2348 sr_dns)) % sizeof(struct in6_addr) != 0)
2349 return -1;
2350 break;
2351 #endif
2352 default:
2353 return -1;
2354 }
2355 }
2356
2357 if (ISSET(info->rti_addrs, RTA_STATIC)) {
2358 const struct sockaddr_rtstatic *rtstatic = (const struct
2359 sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2360 if (rtstatic == NULL)
2361 return -1;
2362 if (rtstatic->sr_len > sizeof(*rtstatic))
2363 return -1;
2364 if (rtstatic->sr_len <=
2365 offsetof(struct sockaddr_rtstatic, sr_static))
2366 return -1;
2367 }
2368
2369 if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2370 const struct sockaddr_rtsearch *rtsearch = (const struct
2371 sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2372 if (rtsearch == NULL)
2373 return -1;
2374 if (rtsearch->sr_len > sizeof(*rtsearch))
2375 return -1;
2376 if (rtsearch->sr_len <=
2377 offsetof(struct sockaddr_rtsearch, sr_search))
2378 return -1;
2379 }
2380
2381 return 0;
2382 }
2383
2384 int
rt_setsource(unsigned int rtableid,const struct sockaddr * src)2385 rt_setsource(unsigned int rtableid, const struct sockaddr *src)
2386 {
2387 struct ifaddr *ifa;
2388 /*
2389 * If source address is 0.0.0.0 or ::
2390 * use automatic source selection
2391 */
2392 switch(src->sa_family) {
2393 case AF_INET:
2394 if(satosin_const(src)->sin_addr.s_addr == INADDR_ANY) {
2395 rtable_setsource(rtableid, AF_INET, NULL);
2396 return (0);
2397 }
2398 break;
2399 #ifdef INET6
2400 case AF_INET6:
2401 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6_const(src)->sin6_addr)) {
2402 rtable_setsource(rtableid, AF_INET6, NULL);
2403 return (0);
2404 }
2405 break;
2406 #endif
2407 default:
2408 return (EAFNOSUPPORT);
2409 }
2410
2411 /*
2412 * Check if source address is assigned to an interface in the
2413 * same rdomain
2414 */
2415 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL)
2416 return (EINVAL);
2417
2418 return rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr);
2419 }
2420
2421 /*
2422 * Definitions of protocols supported in the ROUTE domain.
2423 */
2424
2425 const struct pr_usrreqs route_usrreqs = {
2426 .pru_attach = route_attach,
2427 .pru_detach = route_detach,
2428 .pru_disconnect = route_disconnect,
2429 .pru_shutdown = route_shutdown,
2430 .pru_rcvd = route_rcvd,
2431 .pru_send = route_send,
2432 .pru_sockaddr = route_sockaddr,
2433 .pru_peeraddr = route_peeraddr,
2434 };
2435
2436 const struct protosw routesw[] = {
2437 {
2438 .pr_type = SOCK_RAW,
2439 .pr_domain = &routedomain,
2440 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2441 .pr_ctloutput = route_ctloutput,
2442 .pr_usrreqs = &route_usrreqs,
2443 .pr_init = route_prinit,
2444 .pr_sysctl = sysctl_rtable
2445 }
2446 };
2447
2448 const struct domain routedomain = {
2449 .dom_family = PF_ROUTE,
2450 .dom_name = "route",
2451 .dom_init = route_init,
2452 .dom_protosw = routesw,
2453 .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2454 };
2455