1 /* $NetBSD: in6_pcb.c,v 1.177 2022/11/04 09:04:27 ozaki-r Exp $ */
2 /* $KAME: in6_pcb.c,v 1.84 2001/02/08 18:02:08 itojun Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: in6_pcb.c,v 1.177 2022/11/04 09:04:27 ozaki-r Exp $");
66
67 #ifdef _KERNEL_OPT
68 #include "opt_inet.h"
69 #include "opt_ipsec.h"
70 #endif
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/mbuf.h>
75 #include <sys/protosw.h>
76 #include <sys/socket.h>
77 #include <sys/socketvar.h>
78 #include <sys/ioctl.h>
79 #include <sys/errno.h>
80 #include <sys/time.h>
81 #include <sys/proc.h>
82 #include <sys/kauth.h>
83 #include <sys/domain.h>
84 #include <sys/once.h>
85
86 #include <net/if.h>
87 #include <net/route.h>
88
89 #include <netinet/in.h>
90 #include <netinet/in_var.h>
91 #include <netinet/in_systm.h>
92 #include <netinet/ip.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet/ip6.h>
95 #include <netinet/portalgo.h>
96 #include <netinet6/ip6_var.h>
97 #include <netinet6/in6_pcb.h>
98 #include <netinet6/scope6_var.h>
99
100 #include "faith.h"
101
102 #ifdef IPSEC
103 #include <netipsec/ipsec.h>
104 #include <netipsec/ipsec6.h>
105 #include <netipsec/key.h>
106 #endif /* IPSEC */
107
108 #include <netinet/tcp_vtw.h>
109
110 const struct in6_addr zeroin6_addr;
111
112 #define IN6PCBHASH_PORT(table, lport) \
113 &(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash]
114 #define IN6PCBHASH_BIND(table, laddr, lport) \
115 &(table)->inpt_bindhashtbl[ \
116 (((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \
117 (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3]) + ntohs(lport)) & \
118 (table)->inpt_bindhash]
119 #define IN6PCBHASH_CONNECT(table, faddr, fport, laddr, lport) \
120 &(table)->inpt_bindhashtbl[ \
121 ((((faddr)->s6_addr32[0] ^ (faddr)->s6_addr32[1] ^ \
122 (faddr)->s6_addr32[2] ^ (faddr)->s6_addr32[3]) + ntohs(fport)) + \
123 (((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \
124 (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3]) + \
125 ntohs(lport))) & (table)->inpt_bindhash]
126
127 int ip6_anonportmin = IPV6PORT_ANONMIN;
128 int ip6_anonportmax = IPV6PORT_ANONMAX;
129 int ip6_lowportmin = IPV6PORT_RESERVEDMIN;
130 int ip6_lowportmax = IPV6PORT_RESERVEDMAX;
131
132 void
in6pcb_init(struct inpcbtable * table,int bindhashsize,int connecthashsize)133 in6pcb_init(struct inpcbtable *table, int bindhashsize, int connecthashsize)
134 {
135
136 inpcb_init(table, bindhashsize, connecthashsize);
137 table->inpt_lastport = (in_port_t)ip6_anonportmax;
138 }
139
140 /*
141 * Bind address from sin6 to inp.
142 */
143 static int
in6pcb_bind_addr(struct inpcb * inp,struct sockaddr_in6 * sin6,struct lwp * l)144 in6pcb_bind_addr(struct inpcb *inp, struct sockaddr_in6 *sin6, struct lwp *l)
145 {
146 int error;
147 int s;
148
149 /*
150 * We should check the family, but old programs
151 * incorrectly fail to initialize it.
152 */
153 if (sin6->sin6_family != AF_INET6)
154 return EAFNOSUPPORT;
155
156 #ifndef INET
157 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
158 return EADDRNOTAVAIL;
159 #endif
160
161 if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
162 return error;
163
164 s = pserialize_read_enter();
165 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
166 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
167 error = EINVAL;
168 goto out;
169 }
170 if (sin6->sin6_addr.s6_addr32[3]) {
171 struct sockaddr_in sin;
172
173 memset(&sin, 0, sizeof(sin));
174 sin.sin_len = sizeof(sin);
175 sin.sin_family = AF_INET;
176 bcopy(&sin6->sin6_addr.s6_addr32[3],
177 &sin.sin_addr, sizeof(sin.sin_addr));
178 if (!IN_MULTICAST(sin.sin_addr.s_addr)) {
179 struct ifaddr *ifa;
180 ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
181 if (ifa == NULL &&
182 (inp->inp_flags & IN6P_BINDANY) == 0) {
183 error = EADDRNOTAVAIL;
184 goto out;
185 }
186 }
187 }
188 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
189 // succeed
190 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
191 struct ifaddr *ifa = NULL;
192
193 if ((inp->inp_flags & IN6P_FAITH) == 0) {
194 ifa = ifa_ifwithaddr(sin6tosa(sin6));
195 if (ifa == NULL &&
196 (inp->inp_flags & IN6P_BINDANY) == 0) {
197 error = EADDRNOTAVAIL;
198 goto out;
199 }
200 }
201
202 /*
203 * bind to an anycast address might accidentally
204 * cause sending a packet with an anycast source
205 * address, so we forbid it.
206 *
207 * We should allow to bind to a deprecated address,
208 * since the application dare to use it.
209 * But, can we assume that they are careful enough
210 * to check if the address is deprecated or not?
211 * Maybe, as a safeguard, we should have a setsockopt
212 * flag to control the bind(2) behavior against
213 * deprecated addresses (default: forbid bind(2)).
214 */
215 if (ifa &&
216 ifatoia6(ifa)->ia6_flags &
217 (IN6_IFF_ANYCAST | IN6_IFF_DUPLICATED)) {
218 error = EADDRNOTAVAIL;
219 goto out;
220 }
221 }
222 in6p_laddr(inp) = sin6->sin6_addr;
223 error = 0;
224 out:
225 pserialize_read_exit(s);
226 return error;
227 }
228
229 /*
230 * Bind port from sin6 to inp.
231 */
232 static int
in6pcb_bind_port(struct inpcb * inp,struct sockaddr_in6 * sin6,struct lwp * l)233 in6pcb_bind_port(struct inpcb *inp, struct sockaddr_in6 *sin6, struct lwp *l)
234 {
235 struct inpcbtable *table = inp->inp_table;
236 struct socket *so = inp->inp_socket;
237 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
238 int error;
239
240 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
241 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
242 (so->so_options & SO_ACCEPTCONN) == 0))
243 wild = 1;
244
245 if (sin6->sin6_port != 0) {
246 enum kauth_network_req req;
247
248 #ifndef IPNOPRIVPORTS
249 if (ntohs(sin6->sin6_port) < IPV6PORT_RESERVED)
250 req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
251 else
252 #endif /* IPNOPRIVPORTS */
253 req = KAUTH_REQ_NETWORK_BIND_PORT;
254
255 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_BIND,
256 req, so, sin6, NULL);
257 if (error)
258 return EACCES;
259 }
260
261 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
262 /*
263 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
264 * allow compepte duplication of binding if
265 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
266 * and a multicast address is bound on both
267 * new and duplicated sockets.
268 */
269 if (so->so_options & (SO_REUSEADDR | SO_REUSEPORT))
270 reuseport = SO_REUSEADDR|SO_REUSEPORT;
271 }
272
273 if (sin6->sin6_port != 0) {
274 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
275 #ifdef INET
276 struct inpcb *t;
277 struct vestigial_inpcb vestige;
278
279 t = inpcb_lookup_local(table,
280 *(struct in_addr *)&sin6->sin6_addr.s6_addr32[3],
281 sin6->sin6_port, wild, &vestige);
282 if (t && (reuseport & t->inp_socket->so_options) == 0)
283 return EADDRINUSE;
284 if (!t
285 && vestige.valid
286 && !(reuseport && vestige.reuse_port))
287 return EADDRINUSE;
288 #else
289 return EADDRNOTAVAIL;
290 #endif
291 }
292
293 {
294 struct inpcb *t;
295 struct vestigial_inpcb vestige;
296
297 t = in6pcb_lookup_local(table, &sin6->sin6_addr,
298 sin6->sin6_port, wild, &vestige);
299 if (t && (reuseport & t->inp_socket->so_options) == 0)
300 return EADDRINUSE;
301 if (!t
302 && vestige.valid
303 && !(reuseport && vestige.reuse_port))
304 return EADDRINUSE;
305 }
306 }
307
308 if (sin6->sin6_port == 0) {
309 int e;
310 e = in6pcb_set_port(sin6, inp, l);
311 if (e != 0)
312 return e;
313 } else {
314 inp->inp_lport = sin6->sin6_port;
315 inpcb_set_state(inp, INP_BOUND);
316 }
317
318 LIST_REMOVE(inp, inp_lhash);
319 LIST_INSERT_HEAD(IN6PCBHASH_PORT(table, inp->inp_lport),
320 inp, inp_lhash);
321
322 return 0;
323 }
324
325 int
in6pcb_bind(void * v,struct sockaddr_in6 * sin6,struct lwp * l)326 in6pcb_bind(void *v, struct sockaddr_in6 *sin6, struct lwp *l)
327 {
328 struct inpcb *inp = v;
329 struct sockaddr_in6 lsin6;
330 int error;
331
332 if (inp->inp_af != AF_INET6)
333 return EINVAL;
334
335 /*
336 * If we already have a local port or a local address it means we're
337 * bounded.
338 */
339 if (inp->inp_lport || !(IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)) ||
340 (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp)) &&
341 in6p_laddr(inp).s6_addr32[3] == 0)))
342 return EINVAL;
343
344 if (NULL != sin6) {
345 /* We were provided a sockaddr_in6 to use. */
346 if (sin6->sin6_len != sizeof(*sin6))
347 return EINVAL;
348 } else {
349 /* We always bind to *something*, even if it's "anything". */
350 lsin6 = *((const struct sockaddr_in6 *)
351 inp->inp_socket->so_proto->pr_domain->dom_sa_any);
352 sin6 = &lsin6;
353 }
354
355 /* Bind address. */
356 error = in6pcb_bind_addr(inp, sin6, l);
357 if (error)
358 return error;
359
360 /* Bind port. */
361 error = in6pcb_bind_port(inp, sin6, l);
362 if (error) {
363 /*
364 * Reset the address here to "any" so we don't "leak" the
365 * inpcb.
366 */
367 in6p_laddr(inp) = in6addr_any;
368
369 return error;
370 }
371
372
373 #if 0
374 in6p_flowinfo(inp) = 0; /* XXX */
375 #endif
376 return 0;
377 }
378
379 /*
380 * Connect from a socket to a specified address.
381 * Both address and port must be specified in argument sin6.
382 * If don't have a local address for this socket yet,
383 * then pick one.
384 */
385 int
in6pcb_connect(void * v,struct sockaddr_in6 * sin6,struct lwp * l)386 in6pcb_connect(void *v, struct sockaddr_in6 *sin6, struct lwp *l)
387 {
388 struct inpcb *inp = v;
389 struct in6_addr *in6a = NULL;
390 struct in6_addr ia6;
391 struct ifnet *ifp = NULL; /* outgoing interface */
392 int error = 0;
393 int scope_ambiguous = 0;
394 #ifdef INET
395 struct in6_addr mapped;
396 #endif
397 struct sockaddr_in6 tmp;
398 struct vestigial_inpcb vestige;
399 struct psref psref;
400 int bound;
401
402 (void)&in6a; /* XXX fool gcc */
403
404 if (inp->inp_af != AF_INET6)
405 return EINVAL;
406
407 if (sin6->sin6_len != sizeof(*sin6))
408 return EINVAL;
409 if (sin6->sin6_family != AF_INET6)
410 return EAFNOSUPPORT;
411 if (sin6->sin6_port == 0)
412 return EADDRNOTAVAIL;
413
414 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
415 inp->inp_socket->so_type == SOCK_STREAM)
416 return EADDRNOTAVAIL;
417
418 if (sin6->sin6_scope_id == 0 && !ip6_use_defzone)
419 scope_ambiguous = 1;
420 if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
421 return error;
422
423 /* sanity check for mapped address case */
424 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
425 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
426 return EINVAL;
427 if (IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)))
428 in6p_laddr(inp).s6_addr16[5] = htons(0xffff);
429 if (!IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp)))
430 return EINVAL;
431 } else
432 {
433 if (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp)))
434 return EINVAL;
435 }
436
437 /* protect *sin6 from overwrites */
438 tmp = *sin6;
439 sin6 = &tmp;
440
441 bound = curlwp_bind();
442 /* Source address selection. */
443 if (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp)) &&
444 in6p_laddr(inp).s6_addr32[3] == 0) {
445 #ifdef INET
446 struct sockaddr_in sin;
447 struct in_ifaddr *ia4;
448 struct psref _psref;
449
450 memset(&sin, 0, sizeof(sin));
451 sin.sin_len = sizeof(sin);
452 sin.sin_family = AF_INET;
453 memcpy(&sin.sin_addr, &sin6->sin6_addr.s6_addr32[3],
454 sizeof(sin.sin_addr));
455 ia4 = in_selectsrc(&sin, &inp->inp_route,
456 inp->inp_socket->so_options, NULL, &error, &_psref);
457 if (ia4 == NULL) {
458 if (error == 0)
459 error = EADDRNOTAVAIL;
460 curlwp_bindx(bound);
461 return error;
462 }
463 memset(&mapped, 0, sizeof(mapped));
464 mapped.s6_addr16[5] = htons(0xffff);
465 memcpy(&mapped.s6_addr32[3], &IA_SIN(ia4)->sin_addr,
466 sizeof(IA_SIN(ia4)->sin_addr));
467 ia4_release(ia4, &_psref);
468 in6a = &mapped;
469 #else
470 curlwp_bindx(bound);
471 return EADDRNOTAVAIL;
472 #endif
473 } else {
474 /*
475 * XXX: in6_selectsrc might replace the bound local address
476 * with the address specified by setsockopt(IPV6_PKTINFO).
477 * Is it the intended behavior?
478 */
479 error = in6_selectsrc(sin6, in6p_outputopts(inp),
480 in6p_moptions(inp), &inp->inp_route, &in6p_laddr(inp),
481 &ifp, &psref, &ia6);
482 if (error == 0)
483 in6a = &ia6;
484 if (ifp && scope_ambiguous &&
485 (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) {
486 if_put(ifp, &psref);
487 curlwp_bindx(bound);
488 return error;
489 }
490
491 if (in6a == NULL) {
492 if_put(ifp, &psref);
493 curlwp_bindx(bound);
494 if (error == 0)
495 error = EADDRNOTAVAIL;
496 return error;
497 }
498 }
499
500 if (ifp != NULL) {
501 in6p_ip6(inp).ip6_hlim = (u_int8_t)in6pcb_selecthlim(inp, ifp);
502 if_put(ifp, &psref);
503 } else
504 in6p_ip6(inp).ip6_hlim = (u_int8_t)in6pcb_selecthlim_rt(inp);
505 curlwp_bindx(bound);
506
507 if (in6pcb_lookup(inp->inp_table, &sin6->sin6_addr,
508 sin6->sin6_port,
509 IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)) ? in6a : &in6p_laddr(inp),
510 inp->inp_lport, 0, &vestige)
511 || vestige.valid)
512 return EADDRINUSE;
513 if (IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)) ||
514 (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp)) &&
515 in6p_laddr(inp).s6_addr32[3] == 0))
516 {
517 if (inp->inp_lport == 0) {
518 error = in6pcb_bind(inp, NULL, l);
519 if (error != 0)
520 return error;
521 }
522 in6p_laddr(inp) = *in6a;
523 }
524 in6p_faddr(inp) = sin6->sin6_addr;
525 inp->inp_fport = sin6->sin6_port;
526
527 /* Late bind, if needed */
528 if (inp->inp_bindportonsend) {
529 struct sockaddr_in6 lsin = *((const struct sockaddr_in6 *)
530 inp->inp_socket->so_proto->pr_domain->dom_sa_any);
531 lsin.sin6_addr = in6p_laddr(inp);
532 lsin.sin6_port = 0;
533
534 if ((error = in6pcb_bind_port(inp, &lsin, l)) != 0)
535 return error;
536 }
537
538 inpcb_set_state(inp, INP_CONNECTED);
539 in6p_flowinfo(inp) &= ~IPV6_FLOWLABEL_MASK;
540 if (ip6_auto_flowlabel)
541 in6p_flowinfo(inp) |=
542 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
543 #if defined(IPSEC)
544 if (ipsec_enabled && inp->inp_socket->so_type == SOCK_STREAM)
545 ipsec_pcbconn(inp->inp_sp);
546 #endif
547 return 0;
548 }
549
550 void
in6pcb_disconnect(struct inpcb * inp)551 in6pcb_disconnect(struct inpcb *inp)
552 {
553 memset((void *)&in6p_faddr(inp), 0, sizeof(in6p_faddr(inp)));
554 inp->inp_fport = 0;
555 inpcb_set_state(inp, INP_BOUND);
556 in6p_flowinfo(inp) &= ~IPV6_FLOWLABEL_MASK;
557 #if defined(IPSEC)
558 if (ipsec_enabled)
559 ipsec_pcbdisconn(inp->inp_sp);
560 #endif
561 if (inp->inp_socket->so_state & SS_NOFDREF)
562 inpcb_destroy(inp);
563 }
564
565 void
in6pcb_fetch_sockaddr(struct inpcb * inp,struct sockaddr_in6 * sin6)566 in6pcb_fetch_sockaddr(struct inpcb *inp, struct sockaddr_in6 *sin6)
567 {
568
569 if (inp->inp_af != AF_INET6)
570 return;
571
572 sockaddr_in6_init(sin6, &in6p_laddr(inp), inp->inp_lport, 0, 0);
573 (void)sa6_recoverscope(sin6); /* XXX: should catch errors */
574 }
575
576 void
in6pcb_fetch_peeraddr(struct inpcb * inp,struct sockaddr_in6 * sin6)577 in6pcb_fetch_peeraddr(struct inpcb *inp, struct sockaddr_in6 *sin6)
578 {
579
580 if (inp->inp_af != AF_INET6)
581 return;
582
583 sockaddr_in6_init(sin6, &in6p_faddr(inp), inp->inp_fport, 0, 0);
584 (void)sa6_recoverscope(sin6); /* XXX: should catch errors */
585 }
586
587 /*
588 * Pass some notification to all connections of a protocol
589 * associated with address dst. The local address and/or port numbers
590 * may be specified to limit the search. The "usual action" will be
591 * taken, depending on the ctlinput cmd. The caller must filter any
592 * cmds that are uninteresting (e.g., no error in the map).
593 * Call the protocol specific routine (if any) to report
594 * any errors for each matching socket.
595 *
596 * Must be called at splsoftnet.
597 *
598 * Note: src (4th arg) carries the flowlabel value on the original IPv6
599 * header, in sin6_flowinfo member.
600 */
601 int
in6pcb_notify(struct inpcbtable * table,const struct sockaddr * dst,u_int fport_arg,const struct sockaddr * src,u_int lport_arg,int cmd,void * cmdarg,void (* notify)(struct inpcb *,int))602 in6pcb_notify(struct inpcbtable *table, const struct sockaddr *dst,
603 u_int fport_arg, const struct sockaddr *src, u_int lport_arg, int cmd,
604 void *cmdarg, void (*notify)(struct inpcb *, int))
605 {
606 struct inpcb *inp;
607 struct sockaddr_in6 sa6_src;
608 const struct sockaddr_in6 *sa6_dst;
609 in_port_t fport = fport_arg, lport = lport_arg;
610 int errno;
611 int nmatch = 0;
612 u_int32_t flowinfo;
613
614 if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
615 return 0;
616
617 sa6_dst = (const struct sockaddr_in6 *)dst;
618 if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
619 return 0;
620
621 /*
622 * note that src can be NULL when we get notify by local fragmentation.
623 */
624 sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
625 flowinfo = sa6_src.sin6_flowinfo;
626
627 /*
628 * Redirects go to all references to the destination,
629 * and use in6pcb_rtchange to invalidate the route cache.
630 * Dead host indications: also use in6pcb_rtchange to invalidate
631 * the cache, and deliver the error to all the sockets.
632 * Otherwise, if we have knowledge of the local port and address,
633 * deliver only to that socket.
634 */
635 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
636 fport = 0;
637 lport = 0;
638 memset((void *)&sa6_src.sin6_addr, 0, sizeof(sa6_src.sin6_addr));
639
640 if (cmd != PRC_HOSTDEAD)
641 notify = in6pcb_rtchange;
642 }
643
644 errno = inet6ctlerrmap[cmd];
645 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
646 struct rtentry *rt = NULL;
647
648 if (inp->inp_af != AF_INET6)
649 continue;
650
651 /*
652 * Under the following condition, notify of redirects
653 * to the pcb, without making address matches against inpcb.
654 * - redirect notification is arrived.
655 * - the inpcb is unconnected.
656 * - the inpcb is caching !RTF_HOST routing entry.
657 * - the ICMPv6 notification is from the gateway cached in the
658 * inpcb. i.e. ICMPv6 notification is from nexthop gateway
659 * the inpcb used very recently.
660 *
661 * This is to improve interaction between netbsd/openbsd
662 * redirect handling code, and inpcb route cache code.
663 * without the clause, !RTF_HOST routing entry (which carries
664 * gateway used by inpcb right before the ICMPv6 redirect)
665 * will be cached forever in unconnected inpcb.
666 *
667 * There still is a question regarding to what is TRT:
668 * - On bsdi/freebsd, RTF_HOST (cloned) routing entry will be
669 * generated on packet output. inpcb will always cache
670 * RTF_HOST routing entry so there's no need for the clause
671 * (ICMPv6 redirect will update RTF_HOST routing entry,
672 * and inpcb is caching it already).
673 * However, bsdi/freebsd are vulnerable to local DoS attacks
674 * due to the cloned routing entries.
675 * - Specwise, "destination cache" is mentioned in RFC2461.
676 * Jinmei says that it implies bsdi/freebsd behavior, itojun
677 * is not really convinced.
678 * - Having hiwat/lowat on # of cloned host route (redirect/
679 * pmtud) may be a good idea. netbsd/openbsd has it. see
680 * icmp6_mtudisc_update().
681 */
682 if ((PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) &&
683 IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)) &&
684 (rt = rtcache_validate(&inp->inp_route)) != NULL &&
685 !(rt->rt_flags & RTF_HOST)) {
686 const struct sockaddr_in6 *dst6;
687
688 dst6 = (const struct sockaddr_in6 *)
689 rtcache_getdst(&inp->inp_route);
690 if (dst6 == NULL)
691 ;
692 else if (IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr,
693 &sa6_dst->sin6_addr)) {
694 rtcache_unref(rt, &inp->inp_route);
695 goto do_notify;
696 }
697 }
698 rtcache_unref(rt, &inp->inp_route);
699
700 /*
701 * If the error designates a new path MTU for a destination
702 * and the application (associated with this socket) wanted to
703 * know the value, notify. Note that we notify for all
704 * disconnected sockets if the corresponding application
705 * wanted. This is because some UDP applications keep sending
706 * sockets disconnected.
707 * XXX: should we avoid to notify the value to TCP sockets?
708 */
709 if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 &&
710 (IN6_IS_ADDR_UNSPECIFIED(&in6p_faddr(inp)) ||
711 IN6_ARE_ADDR_EQUAL(&in6p_faddr(inp), &sa6_dst->sin6_addr))) {
712 ip6_notify_pmtu(inp, (const struct sockaddr_in6 *)dst,
713 (u_int32_t *)cmdarg);
714 }
715
716 /*
717 * Detect if we should notify the error. If no source and
718 * destination ports are specified, but non-zero flowinfo and
719 * local address match, notify the error. This is the case
720 * when the error is delivered with an encrypted buffer
721 * by ESP. Otherwise, just compare addresses and ports
722 * as usual.
723 */
724 if (lport == 0 && fport == 0 && flowinfo &&
725 inp->inp_socket != NULL &&
726 flowinfo == (in6p_flowinfo(inp) & IPV6_FLOWLABEL_MASK) &&
727 IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), &sa6_src.sin6_addr))
728 goto do_notify;
729 else if (!IN6_ARE_ADDR_EQUAL(&in6p_faddr(inp),
730 &sa6_dst->sin6_addr) ||
731 inp->inp_socket == NULL ||
732 (lport && inp->inp_lport != lport) ||
733 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
734 !IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp),
735 &sa6_src.sin6_addr)) ||
736 (fport && inp->inp_fport != fport))
737 continue;
738
739 do_notify:
740 if (notify)
741 (*notify)(inp, errno);
742 nmatch++;
743 }
744 return nmatch;
745 }
746
747 void
in6pcb_purgeif0(struct inpcbtable * table,struct ifnet * ifp)748 in6pcb_purgeif0(struct inpcbtable *table, struct ifnet *ifp)
749 {
750 struct inpcb *inp;
751 struct ip6_moptions *im6o;
752 struct in6_multi_mship *imm, *nimm;
753
754 KASSERT(ifp != NULL);
755
756 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
757 bool need_unlock = false;
758 if (inp->inp_af != AF_INET6)
759 continue;
760
761 /* The caller holds either one of inps' lock */
762 if (!inp_locked(inp)) {
763 inp_lock(inp);
764 need_unlock = true;
765 }
766 im6o = in6p_moptions(inp);
767 if (im6o) {
768 /*
769 * Unselect the outgoing interface if it is being
770 * detached.
771 */
772 if (im6o->im6o_multicast_if_index == ifp->if_index)
773 im6o->im6o_multicast_if_index = 0;
774
775 /*
776 * Drop multicast group membership if we joined
777 * through the interface being detached.
778 * XXX controversial - is it really legal for kernel
779 * to force this?
780 */
781 LIST_FOREACH_SAFE(imm, &im6o->im6o_memberships,
782 i6mm_chain, nimm) {
783 if (imm->i6mm_maddr->in6m_ifp == ifp) {
784 LIST_REMOVE(imm, i6mm_chain);
785 in6_leavegroup(imm);
786 }
787 }
788 }
789
790 in_purgeifmcast(inp->inp_moptions, ifp);
791
792 if (need_unlock)
793 inp_unlock(inp);
794 }
795 }
796
797 void
in6pcb_purgeif(struct inpcbtable * table,struct ifnet * ifp)798 in6pcb_purgeif(struct inpcbtable *table, struct ifnet *ifp)
799 {
800 struct rtentry *rt;
801 struct inpcb *inp;
802
803 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
804 if (inp->inp_af != AF_INET6)
805 continue;
806 if ((rt = rtcache_validate(&inp->inp_route)) != NULL &&
807 rt->rt_ifp == ifp) {
808 rtcache_unref(rt, &inp->inp_route);
809 in6pcb_rtchange(inp, 0);
810 } else
811 rtcache_unref(rt, &inp->inp_route);
812 }
813 }
814
815 /*
816 * After a routing change, flush old routing. A new route can be
817 * allocated the next time output is attempted.
818 */
819 void
in6pcb_rtchange(struct inpcb * inp,int errno)820 in6pcb_rtchange(struct inpcb *inp, int errno)
821 {
822 if (inp->inp_af != AF_INET6)
823 return;
824
825 rtcache_free(&inp->inp_route);
826 /*
827 * A new route can be allocated the next time
828 * output is attempted.
829 */
830 }
831
832 struct inpcb *
in6pcb_lookup_local(struct inpcbtable * table,struct in6_addr * laddr6,u_int lport_arg,int lookup_wildcard,struct vestigial_inpcb * vp)833 in6pcb_lookup_local(struct inpcbtable *table, struct in6_addr *laddr6,
834 u_int lport_arg, int lookup_wildcard, struct vestigial_inpcb *vp)
835 {
836 struct inpcbhead *head;
837 struct inpcb *inp, *match = NULL;
838 int matchwild = 3, wildcard;
839 in_port_t lport = lport_arg;
840
841 if (vp)
842 vp->valid = 0;
843
844 head = IN6PCBHASH_PORT(table, lport);
845 LIST_FOREACH(inp, head, inp_lhash) {
846 if (inp->inp_af != AF_INET6)
847 continue;
848
849 if (inp->inp_lport != lport)
850 continue;
851 wildcard = 0;
852 if (IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp))) {
853 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
854 continue;
855 }
856 if (!IN6_IS_ADDR_UNSPECIFIED(&in6p_faddr(inp)))
857 wildcard++;
858 if (IN6_IS_ADDR_V4MAPPED(&in6p_laddr(inp))) {
859 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
860 continue;
861 if (!IN6_IS_ADDR_V4MAPPED(laddr6))
862 continue;
863
864 /* duplicate of IPv4 logic */
865 wildcard = 0;
866 if (IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp)) &&
867 in6p_faddr(inp).s6_addr32[3])
868 wildcard++;
869 if (!in6p_laddr(inp).s6_addr32[3]) {
870 if (laddr6->s6_addr32[3])
871 wildcard++;
872 } else {
873 if (!laddr6->s6_addr32[3])
874 wildcard++;
875 else {
876 if (in6p_laddr(inp).s6_addr32[3] !=
877 laddr6->s6_addr32[3])
878 continue;
879 }
880 }
881 } else if (IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp))) {
882 if (IN6_IS_ADDR_V4MAPPED(laddr6)) {
883 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
884 continue;
885 }
886 if (!IN6_IS_ADDR_UNSPECIFIED(laddr6))
887 wildcard++;
888 } else {
889 if (IN6_IS_ADDR_V4MAPPED(laddr6)) {
890 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
891 continue;
892 }
893 if (IN6_IS_ADDR_UNSPECIFIED(laddr6))
894 wildcard++;
895 else {
896 if (!IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp),
897 laddr6))
898 continue;
899 }
900 }
901 if (wildcard && !lookup_wildcard)
902 continue;
903 if (wildcard < matchwild) {
904 match = inp;
905 matchwild = wildcard;
906 if (matchwild == 0)
907 break;
908 }
909 }
910 if (match && matchwild == 0)
911 return match;
912
913 if (vp && table->vestige && table->vestige->init_ports6) {
914 struct vestigial_inpcb better;
915 bool has_better = false;
916 void *state;
917
918 state = (*table->vestige->init_ports6)(laddr6,
919 lport_arg,
920 lookup_wildcard);
921 while (table->vestige
922 && (*table->vestige->next_port6)(state, vp)) {
923
924 if (vp->lport != lport)
925 continue;
926 wildcard = 0;
927 if (!IN6_IS_ADDR_UNSPECIFIED(&vp->faddr.v6))
928 wildcard++;
929 if (IN6_IS_ADDR_UNSPECIFIED(&vp->laddr.v6)) {
930 if (!IN6_IS_ADDR_UNSPECIFIED(laddr6))
931 wildcard++;
932 } else {
933 if (IN6_IS_ADDR_V4MAPPED(laddr6)) {
934 if (vp->v6only)
935 continue;
936 }
937 if (IN6_IS_ADDR_UNSPECIFIED(laddr6))
938 wildcard++;
939 else {
940 if (!IN6_ARE_ADDR_EQUAL(&vp->laddr.v6, laddr6))
941 continue;
942 }
943 }
944 if (wildcard && !lookup_wildcard)
945 continue;
946 if (wildcard < matchwild) {
947 better = *vp;
948 has_better = true;
949
950 matchwild = wildcard;
951 if (matchwild == 0)
952 break;
953 }
954 }
955
956 if (has_better) {
957 *vp = better;
958 return 0;
959 }
960 }
961 return match;
962 }
963
964 /*
965 * WARNING: return value (rtentry) could be IPv4 one if inpcb is connected to
966 * IPv4 mapped address.
967 */
968 struct rtentry *
in6pcb_rtentry(struct inpcb * inp)969 in6pcb_rtentry(struct inpcb *inp)
970 {
971 struct rtentry *rt;
972 struct route *ro;
973 union {
974 const struct sockaddr *sa;
975 const struct sockaddr_in6 *sa6;
976 #ifdef INET
977 const struct sockaddr_in *sa4;
978 #endif
979 } cdst;
980
981 ro = &inp->inp_route;
982
983 if (inp->inp_af != AF_INET6)
984 return NULL;
985
986 cdst.sa = rtcache_getdst(ro);
987 if (cdst.sa == NULL)
988 ;
989 #ifdef INET
990 else if (cdst.sa->sa_family == AF_INET) {
991 KASSERT(IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp)));
992 if (cdst.sa4->sin_addr.s_addr != in6p_faddr(inp).s6_addr32[3])
993 rtcache_free(ro);
994 }
995 #endif
996 else {
997 if (!IN6_ARE_ADDR_EQUAL(&cdst.sa6->sin6_addr,
998 &in6p_faddr(inp)))
999 rtcache_free(ro);
1000 }
1001 if ((rt = rtcache_validate(ro)) == NULL)
1002 rt = rtcache_update(ro, 1);
1003 #ifdef INET
1004 if (rt == NULL && IN6_IS_ADDR_V4MAPPED(&in6p_faddr(inp))) {
1005 union {
1006 struct sockaddr dst;
1007 struct sockaddr_in dst4;
1008 } u;
1009 struct in_addr addr;
1010
1011 addr.s_addr = in6p_faddr(inp).s6_addr32[3];
1012
1013 sockaddr_in_init(&u.dst4, &addr, 0);
1014 if (rtcache_setdst(ro, &u.dst) != 0)
1015 return NULL;
1016
1017 rt = rtcache_init(ro);
1018 } else
1019 #endif
1020 if (rt == NULL && !IN6_IS_ADDR_UNSPECIFIED(&in6p_faddr(inp))) {
1021 union {
1022 struct sockaddr dst;
1023 struct sockaddr_in6 dst6;
1024 } u;
1025
1026 sockaddr_in6_init(&u.dst6, &in6p_faddr(inp), 0, 0, 0);
1027 if (rtcache_setdst(ro, &u.dst) != 0)
1028 return NULL;
1029
1030 rt = rtcache_init(ro);
1031 }
1032 return rt;
1033 }
1034
1035 void
in6pcb_rtentry_unref(struct rtentry * rt,struct inpcb * inp)1036 in6pcb_rtentry_unref(struct rtentry *rt, struct inpcb *inp)
1037 {
1038
1039 rtcache_unref(rt, &inp->inp_route);
1040 }
1041
1042 struct inpcb *
in6pcb_lookup(struct inpcbtable * table,const struct in6_addr * faddr6,u_int fport_arg,const struct in6_addr * laddr6,u_int lport_arg,int faith,struct vestigial_inpcb * vp)1043 in6pcb_lookup(struct inpcbtable *table, const struct in6_addr *faddr6,
1044 u_int fport_arg, const struct in6_addr *laddr6, u_int lport_arg,
1045 int faith,
1046 struct vestigial_inpcb *vp)
1047 {
1048 struct inpcbhead *head;
1049 struct inpcb *inp;
1050 in_port_t fport = fport_arg, lport = lport_arg;
1051
1052 if (vp)
1053 vp->valid = 0;
1054
1055 head = IN6PCBHASH_CONNECT(table, faddr6, fport, laddr6, lport);
1056 LIST_FOREACH(inp, head, inp_hash) {
1057 if (inp->inp_af != AF_INET6)
1058 continue;
1059
1060 /* find exact match on both source and dest */
1061 if (inp->inp_fport != fport)
1062 continue;
1063 if (inp->inp_lport != lport)
1064 continue;
1065 if (IN6_IS_ADDR_UNSPECIFIED(&in6p_faddr(inp)))
1066 continue;
1067 if (!IN6_ARE_ADDR_EQUAL(&in6p_faddr(inp), faddr6))
1068 continue;
1069 if (IN6_IS_ADDR_UNSPECIFIED(&in6p_laddr(inp)))
1070 continue;
1071 if (!IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), laddr6))
1072 continue;
1073 if ((IN6_IS_ADDR_V4MAPPED(laddr6) ||
1074 IN6_IS_ADDR_V4MAPPED(faddr6)) &&
1075 (inp->inp_flags & IN6P_IPV6_V6ONLY))
1076 continue;
1077 return inp;
1078 }
1079 if (vp && table->vestige) {
1080 if ((*table->vestige->lookup6)(faddr6, fport_arg,
1081 laddr6, lport_arg, vp))
1082 return NULL;
1083 }
1084
1085 return NULL;
1086 }
1087
1088 struct inpcb *
in6pcb_lookup_bound(struct inpcbtable * table,const struct in6_addr * laddr6,u_int lport_arg,int faith)1089 in6pcb_lookup_bound(struct inpcbtable *table, const struct in6_addr *laddr6,
1090 u_int lport_arg, int faith)
1091 {
1092 struct inpcbhead *head;
1093 struct inpcb *inp;
1094 in_port_t lport = lport_arg;
1095 #ifdef INET
1096 struct in6_addr zero_mapped;
1097 #endif
1098
1099 head = IN6PCBHASH_BIND(table, laddr6, lport);
1100 LIST_FOREACH(inp, head, inp_hash) {
1101 if (inp->inp_af != AF_INET6)
1102 continue;
1103
1104 if (faith && (inp->inp_flags & IN6P_FAITH) == 0)
1105 continue;
1106 if (inp->inp_fport != 0)
1107 continue;
1108 if (inp->inp_lport != lport)
1109 continue;
1110 if (IN6_IS_ADDR_V4MAPPED(laddr6) &&
1111 (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
1112 continue;
1113 if (IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), laddr6))
1114 goto out;
1115 }
1116 #ifdef INET
1117 if (IN6_IS_ADDR_V4MAPPED(laddr6)) {
1118 memset(&zero_mapped, 0, sizeof(zero_mapped));
1119 zero_mapped.s6_addr16[5] = 0xffff;
1120 head = IN6PCBHASH_BIND(table, &zero_mapped, lport);
1121 LIST_FOREACH(inp, head, inp_hash) {
1122 if (inp->inp_af != AF_INET6)
1123 continue;
1124
1125 if (faith && (inp->inp_flags & IN6P_FAITH) == 0)
1126 continue;
1127 if (inp->inp_fport != 0)
1128 continue;
1129 if (inp->inp_lport != lport)
1130 continue;
1131 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
1132 continue;
1133 if (IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), &zero_mapped))
1134 goto out;
1135 }
1136 }
1137 #endif
1138 head = IN6PCBHASH_BIND(table, &zeroin6_addr, lport);
1139 LIST_FOREACH(inp, head, inp_hash) {
1140 if (inp->inp_af != AF_INET6)
1141 continue;
1142
1143 if (faith && (inp->inp_flags & IN6P_FAITH) == 0)
1144 continue;
1145 if (inp->inp_fport != 0)
1146 continue;
1147 if (inp->inp_lport != lport)
1148 continue;
1149 if (IN6_IS_ADDR_V4MAPPED(laddr6) &&
1150 (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)
1151 continue;
1152 if (IN6_ARE_ADDR_EQUAL(&in6p_laddr(inp), &zeroin6_addr))
1153 goto out;
1154 }
1155 return NULL;
1156
1157 out:
1158 if (inp != LIST_FIRST(head)) {
1159 LIST_REMOVE(inp, inp_hash);
1160 LIST_INSERT_HEAD(head, inp, inp_hash);
1161 }
1162 return inp;
1163 }
1164
1165 void
in6pcb_set_state(struct inpcb * inp,int state)1166 in6pcb_set_state(struct inpcb *inp, int state)
1167 {
1168
1169 if (inp->inp_af != AF_INET6)
1170 return;
1171
1172 if (inp->inp_state > INP_ATTACHED)
1173 LIST_REMOVE(inp, inp_hash);
1174
1175 switch (state) {
1176 case INP_BOUND:
1177 LIST_INSERT_HEAD(IN6PCBHASH_BIND(inp->inp_table,
1178 &in6p_laddr(inp), inp->inp_lport), inp,
1179 inp_hash);
1180 break;
1181 case INP_CONNECTED:
1182 LIST_INSERT_HEAD(IN6PCBHASH_CONNECT(inp->inp_table,
1183 &in6p_faddr(inp), inp->inp_fport,
1184 &in6p_laddr(inp), inp->inp_lport), inp,
1185 inp_hash);
1186 break;
1187 }
1188
1189 inp->inp_state = state;
1190 }
1191