xref: /dragonfly/sys/netinet/udp_usrreq.c (revision 2513f15e)
1 /*
2  * Copyright (c) 2004 Jeffrey M. Hsu.  All rights reserved.
3  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
4  *
5  * This code is derived from software contributed to The DragonFly Project
6  * by Jeffrey M. Hsu.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of The DragonFly Project nor the names of its
17  *    contributors may be used to endorse or promote products derived
18  *    from this software without specific, prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
63  * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.64.2.18 2003/01/24 05:11:34 sam Exp $
64  */
65 
66 #include "opt_ipsec.h"
67 #include "opt_inet6.h"
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/domain.h>
75 #include <sys/proc.h>
76 #include <sys/priv.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/sysctl.h>
81 #include <sys/syslog.h>
82 #include <sys/in_cksum.h>
83 #include <sys/ktr.h>
84 
85 #include <sys/thread2.h>
86 #include <sys/socketvar2.h>
87 #include <sys/serialize.h>
88 
89 #include <machine/stdarg.h>
90 
91 #include <net/if.h>
92 #include <net/route.h>
93 #include <net/netmsg2.h>
94 #include <net/netisr2.h>
95 
96 #include <netinet/in.h>
97 #include <netinet/in_systm.h>
98 #include <netinet/ip.h>
99 #ifdef INET6
100 #include <netinet/ip6.h>
101 #endif
102 #include <netinet/in_pcb.h>
103 #include <netinet/in_var.h>
104 #include <netinet/ip_var.h>
105 #ifdef INET6
106 #include <netinet6/ip6_var.h>
107 #endif
108 #include <netinet/ip_icmp.h>
109 #include <netinet/icmp_var.h>
110 #include <netinet/udp.h>
111 #include <netinet/udp_var.h>
112 
113 #ifdef FAST_IPSEC
114 #include <netproto/ipsec/ipsec.h>
115 #endif
116 
117 #ifdef IPSEC
118 #include <netinet6/ipsec.h>
119 #endif
120 
121 #define UDP_KTR_STRING		"inp=%p"
122 #define UDP_KTR_ARGS		struct inpcb *inp
123 
124 #ifndef KTR_UDP
125 #define KTR_UDP			KTR_ALL
126 #endif
127 
128 KTR_INFO_MASTER(udp);
129 KTR_INFO(KTR_UDP, udp, output_beg, 0, UDP_KTR_STRING, UDP_KTR_ARGS);
130 KTR_INFO(KTR_UDP, udp, output_end, 1, UDP_KTR_STRING, UDP_KTR_ARGS);
131 KTR_INFO(KTR_UDP, udp, ip_output, 2, UDP_KTR_STRING, UDP_KTR_ARGS);
132 
133 #define logudp(name, inp)	KTR_LOG(udp_##name, inp)
134 
135 /*
136  * UDP protocol implementation.
137  * Per RFC 768, August, 1980.
138  */
139 #ifndef	COMPAT_42
140 static int	udpcksum = 1;
141 #else
142 static int	udpcksum = 0;		/* XXX */
143 #endif
144 SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
145     &udpcksum, 0, "Enable checksumming of UDP packets");
146 
147 int	log_in_vain = 0;
148 SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
149     &log_in_vain, 0, "Log all incoming UDP packets");
150 
151 static int	blackhole = 0;
152 SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
153 	&blackhole, 0, "Do not send port unreachables for refused connects");
154 
155 static int	strict_mcast_mship = 1;
156 SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
157 	&strict_mcast_mship, 0, "Only send multicast to member sockets");
158 
159 int	udp_sosend_async = 1;
160 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_async, CTLFLAG_RW,
161 	&udp_sosend_async, 0, "UDP asynchronized pru_send");
162 
163 int	udp_sosend_prepend = 1;
164 SYSCTL_INT(_net_inet_udp, OID_AUTO, sosend_prepend, CTLFLAG_RW,
165 	&udp_sosend_prepend, 0,
166 	"Prepend enough space for proto and link header in pru_send");
167 
168 static int udp_reuseport_ext = 1;
169 SYSCTL_INT(_net_inet_udp, OID_AUTO, reuseport_ext, CTLFLAG_RW,
170 	&udp_reuseport_ext, 0, "SO_REUSEPORT extension");
171 
172 struct	inpcbinfo udbinfo;
173 struct	inpcbportinfo udbportinfo;
174 
175 static struct netisr_barrier *udbinfo_br;
176 static struct lwkt_serialize udbinfo_slize = LWKT_SERIALIZE_INITIALIZER;
177 
178 #ifndef UDBHASHSIZE
179 #define UDBHASHSIZE 16
180 #endif
181 
182 struct	udpstat udpstat_percpu[MAXCPU] __cachealign;
183 
184 #ifdef INET6
185 struct udp_in6 {
186 	struct sockaddr_in6	uin6_sin;
187 	u_char			uin6_init_done : 1;
188 };
189 struct udp_ip6 {
190 	struct ip6_hdr		uip6_ip6;
191 	u_char			uip6_init_done : 1;
192 };
193 #else
194 struct udp_in6;
195 struct udp_ip6;
196 #endif /* INET6 */
197 
198 static void udp_append (struct inpcb *last, struct ip *ip,
199     struct mbuf *n, int off, struct sockaddr_in *udp_in,
200     struct udp_in6 *, struct udp_ip6 *);
201 #ifdef INET6
202 static void ip_2_ip6_hdr (struct ip6_hdr *ip6, struct ip *ip);
203 #endif
204 
205 static int udp_connect_oncpu(struct socket *so, struct thread *td,
206 			struct sockaddr_in *sin, struct sockaddr_in *if_sin);
207 
208 void
209 udp_init(void)
210 {
211 	int cpu;
212 
213 	in_pcbinfo_init(&udbinfo);
214 	in_pcbportinfo_init(&udbportinfo, UDBHASHSIZE, FALSE, 0);
215 
216 	udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
217 	udbinfo.portinfo = &udbportinfo;
218 	udbinfo.wildcardhashbase = hashinit(UDBHASHSIZE, M_PCB,
219 					    &udbinfo.wildcardhashmask);
220 	udbinfo.localgrphashbase = hashinit(UDBHASHSIZE, M_PCB,
221 					    &udbinfo.localgrphashmask);
222 	udbinfo.ipi_size = sizeof(struct inpcb);
223 
224 	udbinfo_br = netisr_barrier_create();
225 
226 	/*
227 	 * Initialize UDP statistics counters for each CPU.
228 	 */
229 	for (cpu = 0; cpu < ncpus; ++cpu)
230 		bzero(&udpstat_percpu[cpu], sizeof(struct udpstat));
231 }
232 
233 static int
234 sysctl_udpstat(SYSCTL_HANDLER_ARGS)
235 {
236 	int cpu, error = 0;
237 
238 	for (cpu = 0; cpu < ncpus; ++cpu) {
239 		if ((error = SYSCTL_OUT(req, &udpstat_percpu[cpu],
240 					sizeof(struct udpstat))))
241 			break;
242 		if ((error = SYSCTL_IN(req, &udpstat_percpu[cpu],
243 				       sizeof(struct udpstat))))
244 			break;
245 	}
246 
247 	return (error);
248 }
249 SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, (CTLTYPE_OPAQUE | CTLFLAG_RW),
250     0, 0, sysctl_udpstat, "S,udpstat", "UDP statistics");
251 
252 /*
253  * Check multicast packets to make sure they are only sent to sockets with
254  * multicast memberships for the packet's destination address and arrival
255  * interface.  Multicast packets to multicast-unaware sockets are also
256  * disallowed.
257  *
258  * Returns 0 if the packet is acceptable, -1 if it is not.
259  */
260 static __inline int
261 check_multicast_membership(struct ip *ip, struct inpcb *inp, struct mbuf *m)
262 {
263 	int mshipno;
264 	struct ip_moptions *mopt;
265 
266 	if (strict_mcast_mship == 0 ||
267 	    !IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
268 		return (0);
269 	}
270 	mopt = inp->inp_moptions;
271 	if (mopt == NULL)
272 		return (-1);
273 	for (mshipno = 0; mshipno < mopt->imo_num_memberships; ++mshipno) {
274 		struct in_multi *maddr = mopt->imo_membership[mshipno];
275 
276 		if (ip->ip_dst.s_addr == maddr->inm_addr.s_addr &&
277 		    m->m_pkthdr.rcvif == maddr->inm_ifp) {
278 			return (0);
279 		}
280 	}
281 	return (-1);
282 }
283 
284 int
285 udp_input(struct mbuf **mp, int *offp, int proto)
286 {
287 	struct sockaddr_in udp_in = { sizeof udp_in, AF_INET };
288 #ifdef INET6
289 	struct udp_in6 udp_in6 = {
290 		{ sizeof udp_in6.uin6_sin, AF_INET6 }, 0
291 	};
292 	struct udp_ip6 udp_ip6;
293 #endif
294 
295 	int iphlen;
296 	struct ip *ip;
297 	struct udphdr *uh;
298 	struct inpcb *inp;
299 	struct mbuf *m;
300 	struct mbuf *opts = NULL;
301 	int len, off;
302 	struct ip save_ip;
303 	struct sockaddr *append_sa;
304 
305 	off = *offp;
306 	m = *mp;
307 	*mp = NULL;
308 
309 	iphlen = off;
310 	udp_stat.udps_ipackets++;
311 
312 	/*
313 	 * Strip IP options, if any; should skip this,
314 	 * make available to user, and use on returned packets,
315 	 * but we don't yet have a way to check the checksum
316 	 * with options still present.
317 	 */
318 	if (iphlen > sizeof(struct ip)) {
319 		ip_stripoptions(m);
320 		iphlen = sizeof(struct ip);
321 	}
322 
323 	/*
324 	 * IP and UDP headers are together in first mbuf.
325 	 * Already checked and pulled up in ip_demux().
326 	 */
327 	KASSERT(m->m_len >= iphlen + sizeof(struct udphdr),
328 	    ("UDP header not in one mbuf"));
329 
330 	ip = mtod(m, struct ip *);
331 	uh = (struct udphdr *)((caddr_t)ip + iphlen);
332 
333 	/* destination port of 0 is illegal, based on RFC768. */
334 	if (uh->uh_dport == 0)
335 		goto bad;
336 
337 	/*
338 	 * Make mbuf data length reflect UDP length.
339 	 * If not enough data to reflect UDP length, drop.
340 	 */
341 	len = ntohs((u_short)uh->uh_ulen);
342 	if (ip->ip_len != len) {
343 		if (len > ip->ip_len || len < sizeof(struct udphdr)) {
344 			udp_stat.udps_badlen++;
345 			goto bad;
346 		}
347 		m_adj(m, len - ip->ip_len);
348 		/* ip->ip_len = len; */
349 	}
350 	/*
351 	 * Save a copy of the IP header in case we want restore it
352 	 * for sending an ICMP error message in response.
353 	 */
354 	save_ip = *ip;
355 
356 	/*
357 	 * Checksum extended UDP header and data.
358 	 */
359 	if (uh->uh_sum) {
360 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
361 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
362 				uh->uh_sum = m->m_pkthdr.csum_data;
363 			else
364 				uh->uh_sum = in_pseudo(ip->ip_src.s_addr,
365 				    ip->ip_dst.s_addr, htonl((u_short)len +
366 				    m->m_pkthdr.csum_data + IPPROTO_UDP));
367 			uh->uh_sum ^= 0xffff;
368 		} else {
369 			char b[9];
370 
371 			bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
372 			bzero(((struct ipovly *)ip)->ih_x1, 9);
373 			((struct ipovly *)ip)->ih_len = uh->uh_ulen;
374 			uh->uh_sum = in_cksum(m, len + sizeof(struct ip));
375 			bcopy(b, ((struct ipovly *)ip)->ih_x1, 9);
376 		}
377 		if (uh->uh_sum) {
378 			udp_stat.udps_badsum++;
379 			m_freem(m);
380 			return(IPPROTO_DONE);
381 		}
382 	} else
383 		udp_stat.udps_nosum++;
384 
385 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
386 	    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
387 		struct inpcb *last;
388 
389 		/*
390 		 * Deliver a multicast or broadcast datagram to *all* sockets
391 		 * for which the local and remote addresses and ports match
392 		 * those of the incoming datagram.  This allows more than
393 		 * one process to receive multi/broadcasts on the same port.
394 		 * (This really ought to be done for unicast datagrams as
395 		 * well, but that would cause problems with existing
396 		 * applications that open both address-specific sockets and
397 		 * a wildcard socket listening to the same port -- they would
398 		 * end up receiving duplicates of every unicast datagram.
399 		 * Those applications open the multiple sockets to overcome an
400 		 * inadequacy of the UDP socket interface, but for backwards
401 		 * compatibility we avoid the problem here rather than
402 		 * fixing the interface.  Maybe 4.5BSD will remedy this?)
403 		 */
404 
405 		/*
406 		 * Construct sockaddr format source address.
407 		 */
408 		udp_in.sin_port = uh->uh_sport;
409 		udp_in.sin_addr = ip->ip_src;
410 		/*
411 		 * Locate pcb(s) for datagram.
412 		 * (Algorithm copied from raw_intr().)
413 		 */
414 		last = NULL;
415 #ifdef INET6
416 		udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0;
417 #endif
418 		LIST_FOREACH(inp, &udbinfo.pcblisthead, inp_list) {
419 			KKASSERT((inp->inp_flags & INP_PLACEMARKER) == 0);
420 #ifdef INET6
421 			if (!(inp->inp_vflag & INP_IPV4))
422 				continue;
423 #endif
424 			if (inp->inp_lport != uh->uh_dport)
425 				continue;
426 			if (inp->inp_laddr.s_addr != INADDR_ANY) {
427 				if (inp->inp_laddr.s_addr !=
428 				    ip->ip_dst.s_addr)
429 					continue;
430 			}
431 			if (inp->inp_faddr.s_addr != INADDR_ANY) {
432 				if (inp->inp_faddr.s_addr !=
433 				    ip->ip_src.s_addr ||
434 				    inp->inp_fport != uh->uh_sport)
435 					continue;
436 			}
437 
438 			if (check_multicast_membership(ip, inp, m) < 0)
439 				continue;
440 
441 			if (last != NULL) {
442 				struct mbuf *n;
443 
444 #ifdef IPSEC
445 				/* check AH/ESP integrity. */
446 				if (ipsec4_in_reject_so(m, last->inp_socket))
447 					ipsecstat.in_polvio++;
448 					/* do not inject data to pcb */
449 				else
450 #endif /*IPSEC*/
451 #ifdef FAST_IPSEC
452 				/* check AH/ESP integrity. */
453 				if (ipsec4_in_reject(m, last))
454 					;
455 				else
456 #endif /*FAST_IPSEC*/
457 				if ((n = m_copypacket(m, MB_DONTWAIT)) != NULL)
458 					udp_append(last, ip, n,
459 					    iphlen + sizeof(struct udphdr),
460 					    &udp_in,
461 #ifdef INET6
462 					    &udp_in6, &udp_ip6
463 #else
464 				            NULL, NULL
465 #endif
466 					    );
467 			}
468 			last = inp;
469 			/*
470 			 * Don't look for additional matches if this one does
471 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
472 			 * socket options set.  This heuristic avoids searching
473 			 * through all pcbs in the common case of a non-shared
474 			 * port.  It * assumes that an application will never
475 			 * clear these options after setting them.
476 			 */
477 			if (!(last->inp_socket->so_options &
478 			    (SO_REUSEPORT | SO_REUSEADDR)))
479 				break;
480 		}
481 
482 		if (last == NULL) {
483 			/*
484 			 * No matching pcb found; discard datagram.
485 			 * (No need to send an ICMP Port Unreachable
486 			 * for a broadcast or multicast datgram.)
487 			 */
488 			udp_stat.udps_noportbcast++;
489 			goto bad;
490 		}
491 #ifdef IPSEC
492 		/* check AH/ESP integrity. */
493 		if (ipsec4_in_reject_so(m, last->inp_socket)) {
494 			ipsecstat.in_polvio++;
495 			goto bad;
496 		}
497 #endif /*IPSEC*/
498 #ifdef FAST_IPSEC
499 		/* check AH/ESP integrity. */
500 		if (ipsec4_in_reject(m, last))
501 			goto bad;
502 #endif /*FAST_IPSEC*/
503 		udp_append(last, ip, m, iphlen + sizeof(struct udphdr),
504 		    &udp_in,
505 #ifdef INET6
506 		    &udp_in6, &udp_ip6
507 #else
508 		    NULL, NULL
509 #endif
510 		    );
511 		return(IPPROTO_DONE);
512 	}
513 	/*
514 	 * Locate pcb for datagram.
515 	 */
516 	inp = in_pcblookup_pkthash(&udbinfo, ip->ip_src, uh->uh_sport,
517 	    ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif,
518 	    udp_reuseport_ext ? m : NULL);
519 	if (inp == NULL) {
520 		if (log_in_vain) {
521 			char buf[sizeof "aaa.bbb.ccc.ddd"];
522 
523 			strcpy(buf, inet_ntoa(ip->ip_dst));
524 			log(LOG_INFO,
525 			    "Connection attempt to UDP %s:%d from %s:%d\n",
526 			    buf, ntohs(uh->uh_dport), inet_ntoa(ip->ip_src),
527 			    ntohs(uh->uh_sport));
528 		}
529 		udp_stat.udps_noport++;
530 		if (m->m_flags & (M_BCAST | M_MCAST)) {
531 			udp_stat.udps_noportbcast++;
532 			goto bad;
533 		}
534 		if (blackhole)
535 			goto bad;
536 #ifdef ICMP_BANDLIM
537 		if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
538 			goto bad;
539 #endif
540 		*ip = save_ip;
541 		ip->ip_len += iphlen;
542 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
543 		return(IPPROTO_DONE);
544 	}
545 #ifdef IPSEC
546 	if (ipsec4_in_reject_so(m, inp->inp_socket)) {
547 		ipsecstat.in_polvio++;
548 		goto bad;
549 	}
550 #endif /*IPSEC*/
551 #ifdef FAST_IPSEC
552 	if (ipsec4_in_reject(m, inp))
553 		goto bad;
554 #endif /*FAST_IPSEC*/
555 	/*
556 	 * Check the minimum TTL for socket.
557 	 */
558 	if (ip->ip_ttl < inp->inp_ip_minttl)
559 		goto bad;
560 
561 	/*
562 	 * Construct sockaddr format source address.
563 	 * Stuff source address and datagram in user buffer.
564 	 */
565 	udp_in.sin_port = uh->uh_sport;
566 	udp_in.sin_addr = ip->ip_src;
567 	if ((inp->inp_flags & INP_CONTROLOPTS) ||
568 	    (inp->inp_socket->so_options & SO_TIMESTAMP)) {
569 #ifdef INET6
570 		if (inp->inp_vflag & INP_IPV6) {
571 			int savedflags;
572 
573 			ip_2_ip6_hdr(&udp_ip6.uip6_ip6, ip);
574 			savedflags = inp->inp_flags;
575 			inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
576 			ip6_savecontrol(inp, &opts, &udp_ip6.uip6_ip6, m);
577 			inp->inp_flags = savedflags;
578 		} else
579 #endif
580 		ip_savecontrol(inp, &opts, ip, m);
581 	}
582 	m_adj(m, iphlen + sizeof(struct udphdr));
583 #ifdef INET6
584 	if (inp->inp_vflag & INP_IPV6) {
585 		in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin);
586 		append_sa = (struct sockaddr *)&udp_in6;
587 	} else
588 #endif
589 		append_sa = (struct sockaddr *)&udp_in;
590 
591 	lwkt_gettoken(&inp->inp_socket->so_rcv.ssb_token);
592 	if (ssb_appendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts) == 0) {
593 		udp_stat.udps_fullsock++;
594 		lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token);
595 		goto bad;
596 	}
597 	lwkt_reltoken(&inp->inp_socket->so_rcv.ssb_token);
598 	sorwakeup(inp->inp_socket);
599 	return(IPPROTO_DONE);
600 bad:
601 	m_freem(m);
602 	if (opts)
603 		m_freem(opts);
604 	return(IPPROTO_DONE);
605 }
606 
607 #ifdef INET6
608 static void
609 ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip)
610 {
611 	bzero(ip6, sizeof *ip6);
612 
613 	ip6->ip6_vfc = IPV6_VERSION;
614 	ip6->ip6_plen = ip->ip_len;
615 	ip6->ip6_nxt = ip->ip_p;
616 	ip6->ip6_hlim = ip->ip_ttl;
617 	ip6->ip6_src.s6_addr32[2] = ip6->ip6_dst.s6_addr32[2] =
618 		IPV6_ADDR_INT32_SMP;
619 	ip6->ip6_src.s6_addr32[3] = ip->ip_src.s_addr;
620 	ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr;
621 }
622 #endif
623 
624 /*
625  * subroutine of udp_input(), mainly for source code readability.
626  * caller must properly init udp_ip6 and udp_in6 beforehand.
627  */
628 static void
629 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off,
630     struct sockaddr_in *udp_in,
631     struct udp_in6 *udp_in6, struct udp_ip6 *udp_ip6)
632 {
633 	struct sockaddr *append_sa;
634 	struct mbuf *opts = NULL;
635 
636 	if (last->inp_flags & INP_CONTROLOPTS ||
637 	    last->inp_socket->so_options & SO_TIMESTAMP) {
638 #ifdef INET6
639 		if (last->inp_vflag & INP_IPV6) {
640 			int savedflags;
641 
642 			if (udp_ip6->uip6_init_done == 0) {
643 				ip_2_ip6_hdr(&udp_ip6->uip6_ip6, ip);
644 				udp_ip6->uip6_init_done = 1;
645 			}
646 			savedflags = last->inp_flags;
647 			last->inp_flags &= ~INP_UNMAPPABLEOPTS;
648 			ip6_savecontrol(last, &opts, &udp_ip6->uip6_ip6, n);
649 			last->inp_flags = savedflags;
650 		} else
651 #endif
652 		ip_savecontrol(last, &opts, ip, n);
653 	}
654 #ifdef INET6
655 	if (last->inp_vflag & INP_IPV6) {
656 		if (udp_in6->uin6_init_done == 0) {
657 			in6_sin_2_v4mapsin6(udp_in, &udp_in6->uin6_sin);
658 			udp_in6->uin6_init_done = 1;
659 		}
660 		append_sa = (struct sockaddr *)&udp_in6->uin6_sin;
661 	} else
662 #endif
663 		append_sa = (struct sockaddr *)udp_in;
664 	m_adj(n, off);
665 	lwkt_gettoken(&last->inp_socket->so_rcv.ssb_token);
666 	if (ssb_appendaddr(&last->inp_socket->so_rcv, append_sa, n, opts) == 0) {
667 		m_freem(n);
668 		if (opts)
669 			m_freem(opts);
670 		udp_stat.udps_fullsock++;
671 	} else {
672 		sorwakeup(last->inp_socket);
673 	}
674 	lwkt_reltoken(&last->inp_socket->so_rcv.ssb_token);
675 }
676 
677 /*
678  * Notify a udp user of an asynchronous error;
679  * just wake up so that he can collect error status.
680  */
681 void
682 udp_notify(struct inpcb *inp, int error)
683 {
684 	inp->inp_socket->so_error = error;
685 	sorwakeup(inp->inp_socket);
686 	sowwakeup(inp->inp_socket);
687 }
688 
689 struct netmsg_udp_notify {
690 	struct netmsg_base base;
691 	void		(*nm_notify)(struct inpcb *, int);
692 	struct in_addr	nm_faddr;
693 	int		nm_arg;
694 };
695 
696 static void
697 udp_notifyall_oncpu(netmsg_t msg)
698 {
699 	struct netmsg_udp_notify *nm = (struct netmsg_udp_notify *)msg;
700 #if 0
701 	int nextcpu;
702 #endif
703 
704 	in_pcbnotifyall(&udbinfo.pcblisthead, nm->nm_faddr,
705 			nm->nm_arg, nm->nm_notify);
706 	lwkt_replymsg(&nm->base.lmsg, 0);
707 
708 #if 0
709 	/* XXX currently udp only runs on cpu 0 */
710 	nextcpu = mycpuid + 1;
711 	if (nextcpu < ncpus2)
712 		lwkt_forwardmsg(netisr_cpuport(nextcpu), &nm->base.lmsg);
713 	else
714 		lwkt_replymsg(&nmsg->base.lmsg, 0);
715 #endif
716 }
717 
718 static void
719 udp_rtchange(struct inpcb *inp, int err)
720 {
721 	/* XXX Nuke this, once UDP inpcbs are CPU localized */
722 	if (inp->inp_route.ro_rt && inp->inp_route.ro_rt->rt_cpuid == mycpuid) {
723 		rtfree(inp->inp_route.ro_rt);
724 		inp->inp_route.ro_rt = NULL;
725 		/*
726 		 * A new route can be allocated the next time
727 		 * output is attempted.
728 		 */
729 	}
730 }
731 
732 void
733 udp_ctlinput(netmsg_t msg)
734 {
735 	struct sockaddr *sa = msg->ctlinput.nm_arg;
736 	struct ip *ip = msg->ctlinput.nm_extra;
737 	int cmd = msg->ctlinput.nm_cmd;
738 	struct udphdr *uh;
739 	void (*notify) (struct inpcb *, int) = udp_notify;
740 	struct in_addr faddr;
741 	struct inpcb *inp;
742 
743 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
744 
745 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
746 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
747 		goto done;
748 
749 	if (PRC_IS_REDIRECT(cmd)) {
750 		ip = NULL;
751 		notify = udp_rtchange;
752 	} else if (cmd == PRC_HOSTDEAD) {
753 		ip = NULL;
754 	} else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
755 		goto done;
756 	}
757 
758 	if (ip) {
759 		uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
760 		inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
761 					ip->ip_src, uh->uh_sport, 0, NULL);
762 		if (inp != NULL && inp->inp_socket != NULL)
763 			(*notify)(inp, inetctlerrmap[cmd]);
764 	} else if (PRC_IS_REDIRECT(cmd)) {
765 		struct netmsg_udp_notify *nm;
766 
767 		KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
768 		nm = kmalloc(sizeof(*nm), M_LWKTMSG, M_INTWAIT);
769 		netmsg_init(&nm->base, NULL, &netisr_afree_rport,
770 			    0, udp_notifyall_oncpu);
771 		nm->nm_faddr = faddr;
772 		nm->nm_arg = inetctlerrmap[cmd];
773 		nm->nm_notify = notify;
774 		lwkt_sendmsg(netisr_cpuport(0), &nm->base.lmsg);
775 	} else {
776 		/*
777 		 * XXX We should forward msg upon PRC_HOSTHEAD and ip == NULL,
778 		 * once UDP inpcbs are CPU localized
779 		 */
780 		KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
781 		in_pcbnotifyall(&udbinfo.pcblisthead, faddr, inetctlerrmap[cmd],
782 				notify);
783 	}
784 done:
785 	lwkt_replymsg(&msg->lmsg, 0);
786 }
787 
788 static int
789 udp_pcblist(SYSCTL_HANDLER_ARGS)
790 {
791 	struct xinpcb *xi;
792 	int error, nxi, i;
793 
794 	udbinfo_lock();
795 	error = in_pcblist_global_nomarker(oidp, arg1, arg2, req, &xi, &nxi);
796 	udbinfo_unlock();
797 
798 	if (error) {
799 		KKASSERT(xi == NULL);
800 		return error;
801 	}
802 	if (nxi == 0) {
803 		KKASSERT(xi == NULL);
804 		return 0;
805 	}
806 
807 	for (i = 0; i < nxi; ++i) {
808 		error = SYSCTL_OUT(req, &xi[i], sizeof(xi[i]));
809 		if (error)
810 			break;
811 	}
812 	kfree(xi, M_TEMP);
813 
814 	return error;
815 }
816 SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, &udbinfo, 0,
817 	    udp_pcblist, "S,xinpcb", "List of active UDP sockets");
818 
819 static int
820 udp_getcred(SYSCTL_HANDLER_ARGS)
821 {
822 	struct sockaddr_in addrs[2];
823 	struct ucred cred0, *cred = NULL;
824 	struct inpcb *inp;
825 	int error;
826 
827 	error = priv_check(req->td, PRIV_ROOT);
828 	if (error)
829 		return (error);
830 	error = SYSCTL_IN(req, addrs, sizeof addrs);
831 	if (error)
832 		return (error);
833 
834 	udbinfo_lock();
835 	inp = in_pcblookup_hash(&udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
836 				addrs[0].sin_addr, addrs[0].sin_port, 1, NULL);
837 	if (inp == NULL || inp->inp_socket == NULL) {
838 		error = ENOENT;
839 	} else {
840 		if (inp->inp_socket->so_cred != NULL) {
841 			cred0 = *(inp->inp_socket->so_cred);
842 			cred = &cred0;
843 		}
844 	}
845 	udbinfo_unlock();
846 
847 	if (error)
848 		return error;
849 
850 	return SYSCTL_OUT(req, cred, sizeof(struct ucred));
851 }
852 
853 SYSCTL_PROC(_net_inet_udp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
854     0, 0, udp_getcred, "S,ucred", "Get the ucred of a UDP connection");
855 
856 static void
857 udp_send(netmsg_t msg)
858 {
859 	struct socket *so = msg->send.base.nm_so;
860 	struct mbuf *m = msg->send.nm_m;
861 	struct sockaddr *dstaddr = msg->send.nm_addr;
862 	int pru_flags = msg->send.nm_flags;
863 	struct inpcb *inp = so->so_pcb;
864 	struct thread *td = msg->send.nm_td;
865 	int flags;
866 
867 	struct udpiphdr *ui;
868 	int len = m->m_pkthdr.len;
869 	struct sockaddr_in *sin;	/* really is initialized before use */
870 	int error = 0;
871 
872 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
873 	KKASSERT(msg->send.nm_control == NULL);
874 
875 	logudp(output_beg, inp);
876 
877 	if (inp == NULL) {
878 		error = EINVAL;
879 		goto release;
880 	}
881 
882 	if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
883 		error = EMSGSIZE;
884 		goto release;
885 	}
886 
887 	if (inp->inp_lport == 0) {	/* unbound socket */
888 		error = in_pcbbind(inp, NULL, td);
889 		if (error)
890 			goto release;
891 
892 		udbinfo_barrier_set();
893 		in_pcbinswildcardhash(inp);
894 		udbinfo_barrier_rem();
895 	}
896 
897 	if (dstaddr != NULL) {		/* destination address specified */
898 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
899 			/* already connected */
900 			error = EISCONN;
901 			goto release;
902 		}
903 		sin = (struct sockaddr_in *)dstaddr;
904 		if (!prison_remote_ip(td, (struct sockaddr *)&sin)) {
905 			error = EAFNOSUPPORT; /* IPv6 only jail */
906 			goto release;
907 		}
908 	} else {
909 		if (inp->inp_faddr.s_addr == INADDR_ANY) {
910 			/* no destination specified and not already connected */
911 			error = ENOTCONN;
912 			goto release;
913 		}
914 		sin = NULL;
915 	}
916 
917 	/*
918 	 * Calculate data length and get a mbuf
919 	 * for UDP and IP headers.
920 	 */
921 	M_PREPEND(m, sizeof(struct udpiphdr), MB_DONTWAIT);
922 	if (m == NULL) {
923 		error = ENOBUFS;
924 		goto release;
925 	}
926 
927 	/*
928 	 * Fill in mbuf with extended UDP header
929 	 * and addresses and length put into network format.
930 	 */
931 	ui = mtod(m, struct udpiphdr *);
932 	bzero(ui->ui_x1, sizeof ui->ui_x1);	/* XXX still needed? */
933 	ui->ui_pr = IPPROTO_UDP;
934 
935 	/*
936 	 * Set destination address.
937 	 */
938 	if (dstaddr != NULL) {			/* use specified destination */
939 		ui->ui_dst = sin->sin_addr;
940 		ui->ui_dport = sin->sin_port;
941 	} else {				/* use connected destination */
942 		ui->ui_dst = inp->inp_faddr;
943 		ui->ui_dport = inp->inp_fport;
944 	}
945 
946 	/*
947 	 * Set source address.
948 	 */
949 	if (inp->inp_laddr.s_addr == INADDR_ANY ||
950 	    IN_MULTICAST(ntohl(inp->inp_laddr.s_addr))) {
951 		struct sockaddr_in *if_sin;
952 
953 		if (dstaddr == NULL) {
954 			/*
955 			 * connect() had (or should have) failed because
956 			 * the interface had no IP address, but the
957 			 * application proceeded to call send() anyways.
958 			 */
959 			error = ENOTCONN;
960 			goto release;
961 		}
962 
963 		/* Look up outgoing interface. */
964 		error = in_pcbladdr_find(inp, dstaddr, &if_sin, td, 1);
965 		if (error)
966 			goto release;
967 		ui->ui_src = if_sin->sin_addr;	/* use address of interface */
968 	} else {
969 		ui->ui_src = inp->inp_laddr;	/* use non-null bound address */
970 	}
971 	ui->ui_sport = inp->inp_lport;
972 	KASSERT(inp->inp_lport != 0, ("inp lport should have been bound"));
973 
974 	/*
975 	 * Release the original thread, since it is no longer used
976 	 */
977 	if (pru_flags & PRUS_HELDTD) {
978 		lwkt_rele(td);
979 		pru_flags &= ~PRUS_HELDTD;
980 	}
981 	/*
982 	 * Free the dest address, since it is no longer needed
983 	 */
984 	if (pru_flags & PRUS_FREEADDR) {
985 		kfree(dstaddr, M_SONAME);
986 		pru_flags &= ~PRUS_FREEADDR;
987 	}
988 
989 	ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr));
990 
991 	/*
992 	 * Set up checksum and output datagram.
993 	 */
994 	if (udpcksum) {
995 		ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr,
996 		    htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
997 		m->m_pkthdr.csum_flags = CSUM_UDP;
998 		m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
999 		m->m_pkthdr.csum_thlen = sizeof(struct udphdr);
1000 	} else {
1001 		ui->ui_sum = 0;
1002 	}
1003 	((struct ip *)ui)->ip_len = sizeof(struct udpiphdr) + len;
1004 	((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl;	/* XXX */
1005 	((struct ip *)ui)->ip_tos = inp->inp_ip_tos;	/* XXX */
1006 	udp_stat.udps_opackets++;
1007 
1008 	flags = IP_DEBUGROUTE |
1009 	    (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST));
1010 	if (pru_flags & PRUS_DONTROUTE)
1011 		flags |= SO_DONTROUTE;
1012 
1013 	logudp(ip_output, inp);
1014 	error = ip_output(m, inp->inp_options, &inp->inp_route, flags,
1015 	    inp->inp_moptions, inp);
1016 	m = NULL;
1017 
1018 release:
1019 	if (m != NULL)
1020 		m_freem(m);
1021 
1022 	if (pru_flags & PRUS_HELDTD)
1023 		lwkt_rele(td);
1024 	if (pru_flags & PRUS_FREEADDR)
1025 		kfree(dstaddr, M_SONAME);
1026 	if ((pru_flags & PRUS_NOREPLY) == 0)
1027 		lwkt_replymsg(&msg->send.base.lmsg, error);
1028 
1029 	logudp(output_end, inp);
1030 }
1031 
1032 u_long	udp_sendspace = 9216;		/* really max datagram size */
1033 					/* 40 1K datagrams */
1034 SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
1035     &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
1036 
1037 u_long	udp_recvspace = 40 * (1024 +
1038 #ifdef INET6
1039 				      sizeof(struct sockaddr_in6)
1040 #else
1041 				      sizeof(struct sockaddr_in)
1042 #endif
1043 				      );
1044 SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
1045     &udp_recvspace, 0, "Maximum incoming UDP datagram size");
1046 
1047 /*
1048  * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort()
1049  *	 will sofree() it when we return.
1050  */
1051 static void
1052 udp_abort(netmsg_t msg)
1053 {
1054 	struct socket *so = msg->abort.base.nm_so;
1055 	struct inpcb *inp;
1056 	int error;
1057 
1058 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1059 
1060 	inp = so->so_pcb;
1061 	if (inp) {
1062 		soisdisconnected(so);
1063 
1064 		udbinfo_barrier_set();
1065 		in_pcbdetach(inp);
1066 		udbinfo_barrier_rem();
1067 		error = 0;
1068 	} else {
1069 		error = EINVAL;
1070 	}
1071 	lwkt_replymsg(&msg->abort.base.lmsg, error);
1072 }
1073 
1074 static void
1075 udp_attach(netmsg_t msg)
1076 {
1077 	struct socket *so = msg->attach.base.nm_so;
1078 	struct pru_attach_info *ai = msg->attach.nm_ai;
1079 	struct inpcb *inp;
1080 	int error;
1081 
1082 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1083 
1084 	inp = so->so_pcb;
1085 	if (inp != NULL) {
1086 		error = EINVAL;
1087 		goto out;
1088 	}
1089 	error = soreserve(so, udp_sendspace, udp_recvspace, ai->sb_rlimit);
1090 	if (error)
1091 		goto out;
1092 
1093 	udbinfo_barrier_set();
1094 	error = in_pcballoc(so, &udbinfo);
1095 	udbinfo_barrier_rem();
1096 
1097 	if (error)
1098 		goto out;
1099 
1100 	/*
1101 	 * Set default port for protocol processing prior to bind/connect.
1102 	 */
1103 	sosetport(so, netisr_cpuport(0));
1104 
1105 	inp = (struct inpcb *)so->so_pcb;
1106 	inp->inp_vflag |= INP_IPV4;
1107 	inp->inp_ip_ttl = ip_defttl;
1108 	error = 0;
1109 out:
1110 	lwkt_replymsg(&msg->attach.base.lmsg, error);
1111 }
1112 
1113 static void
1114 udp_bind(netmsg_t msg)
1115 {
1116 	struct socket *so = msg->bind.base.nm_so;
1117 	struct sockaddr *nam = msg->bind.nm_nam;
1118 	struct thread *td = msg->bind.nm_td;
1119 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
1120 	struct inpcb *inp;
1121 	int error;
1122 
1123 	inp = so->so_pcb;
1124 	if (inp) {
1125 		error = in_pcbbind(inp, nam, td);
1126 		if (error == 0) {
1127 			if (sin->sin_addr.s_addr != INADDR_ANY)
1128 				inp->inp_flags |= INP_WASBOUND_NOTANY;
1129 
1130 			udbinfo_barrier_set();
1131 			in_pcbinswildcardhash(inp);
1132 			udbinfo_barrier_rem();
1133 		}
1134 	} else {
1135 		error = EINVAL;
1136 	}
1137 	lwkt_replymsg(&msg->bind.base.lmsg, error);
1138 }
1139 
1140 static void
1141 udp_connect(netmsg_t msg)
1142 {
1143 	struct socket *so = msg->connect.base.nm_so;
1144 	struct sockaddr *nam = msg->connect.nm_nam;
1145 	struct thread *td = msg->connect.nm_td;
1146 	struct inpcb *inp;
1147 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
1148 	struct sockaddr_in *if_sin;
1149 	lwkt_port_t port;
1150 	int error;
1151 
1152 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1153 
1154 	inp = so->so_pcb;
1155 	if (inp == NULL) {
1156 		error = EINVAL;
1157 		goto out;
1158 	}
1159 
1160 	if (msg->connect.nm_flags & PRUC_RECONNECT) {
1161 		panic("UDP does not support RECONNECT");
1162 #ifdef notyet
1163 		msg->connect.nm_flags &= ~PRUC_RECONNECT;
1164 		in_pcblink(inp, &udbinfo);
1165 #endif
1166 	}
1167 
1168 	if (inp->inp_faddr.s_addr != INADDR_ANY) {
1169 		error = EISCONN;
1170 		goto out;
1171 	}
1172 	error = 0;
1173 
1174 	/*
1175 	 * Bind if we have to
1176 	 */
1177 	if (td->td_proc && td->td_proc->p_ucred->cr_prison != NULL &&
1178 	    inp->inp_laddr.s_addr == INADDR_ANY) {
1179 		error = in_pcbbind(inp, NULL, td);
1180 		if (error)
1181 			goto out;
1182 	}
1183 
1184 	/*
1185 	 * Calculate the correct protocol processing thread.  The connect
1186 	 * operation must run there.
1187 	 */
1188 	error = in_pcbladdr(inp, nam, &if_sin, td);
1189 	if (error)
1190 		goto out;
1191 	if (!prison_remote_ip(td, nam)) {
1192 		error = EAFNOSUPPORT; /* IPv6 only jail */
1193 		goto out;
1194 	}
1195 
1196 	port = udp_addrport(sin->sin_addr.s_addr, sin->sin_port,
1197 			    inp->inp_laddr.s_addr, inp->inp_lport);
1198 	if (port != &curthread->td_msgport) {
1199 #ifdef notyet
1200 		struct route *ro = &inp->inp_route;
1201 
1202 		/*
1203 		 * in_pcbladdr() may have allocated a route entry for us
1204 		 * on the current CPU, but we need a route entry on the
1205 		 * inpcb's owner CPU, so free it here.
1206 		 */
1207 		if (ro->ro_rt != NULL)
1208 			RTFREE(ro->ro_rt);
1209 		bzero(ro, sizeof(*ro));
1210 
1211 		/*
1212 		 * We are moving the protocol processing port the socket
1213 		 * is on, we have to unlink here and re-link on the
1214 		 * target cpu.
1215 		 */
1216 		in_pcbunlink(so->so_pcb, &udbinfo);
1217 		/* in_pcbunlink(so->so_pcb, &udbinfo[mycpu->gd_cpuid]); */
1218 		sosetport(so, port);
1219 		msg->connect.nm_flags |= PRUC_RECONNECT;
1220 		msg->connect.base.nm_dispatch = udp_connect;
1221 
1222 		lwkt_forwardmsg(port, &msg->connect.base.lmsg);
1223 		/* msg invalid now */
1224 		return;
1225 #else
1226 		panic("UDP activity should only be in netisr0");
1227 #endif
1228 	}
1229 	KKASSERT(port == &curthread->td_msgport);
1230 	error = udp_connect_oncpu(so, td, sin, if_sin);
1231 out:
1232 	KKASSERT(msg->connect.nm_m == NULL);
1233 	lwkt_replymsg(&msg->connect.base.lmsg, error);
1234 }
1235 
1236 static int
1237 udp_connect_oncpu(struct socket *so, struct thread *td,
1238 		  struct sockaddr_in *sin, struct sockaddr_in *if_sin)
1239 {
1240 	struct inpcb *inp;
1241 	int error;
1242 
1243 	udbinfo_barrier_set();
1244 
1245 	inp = so->so_pcb;
1246 	if (inp->inp_flags & INP_WILDCARD)
1247 		in_pcbremwildcardhash(inp);
1248 	error = in_pcbconnect(inp, (struct sockaddr *)sin, td);
1249 
1250 	if (error == 0) {
1251 		/*
1252 		 * No more errors can occur, finish adjusting the socket
1253 		 * and change the processing port to reflect the connected
1254 		 * socket.  Once set we can no longer safely mess with the
1255 		 * socket.
1256 		 */
1257 		soisconnected(so);
1258 	} else if (error == EAFNOSUPPORT) {	/* connection dissolved */
1259 		/*
1260 		 * Follow traditional BSD behavior and retain
1261 		 * the local port binding.  But, fix the old misbehavior
1262 		 * of overwriting any previously bound local address.
1263 		 */
1264 		if (!(inp->inp_flags & INP_WASBOUND_NOTANY))
1265 			inp->inp_laddr.s_addr = INADDR_ANY;
1266 		in_pcbinswildcardhash(inp);
1267 	}
1268 
1269 	udbinfo_barrier_rem();
1270 	return error;
1271 }
1272 
1273 static void
1274 udp_detach(netmsg_t msg)
1275 {
1276 	struct socket *so = msg->detach.base.nm_so;
1277 	struct inpcb *inp;
1278 	int error;
1279 
1280 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1281 
1282 	inp = so->so_pcb;
1283 	if (inp) {
1284 		udbinfo_barrier_set();
1285 		in_pcbdetach(inp);
1286 		udbinfo_barrier_rem();
1287 		error = 0;
1288 	} else {
1289 		error = EINVAL;
1290 	}
1291 	lwkt_replymsg(&msg->detach.base.lmsg, error);
1292 }
1293 
1294 static void
1295 udp_disconnect(netmsg_t msg)
1296 {
1297 	struct socket *so = msg->disconnect.base.nm_so;
1298 	struct route *ro;
1299 	struct inpcb *inp;
1300 	int error;
1301 
1302 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1303 
1304 	inp = so->so_pcb;
1305 	if (inp == NULL) {
1306 		error = EINVAL;
1307 		goto out;
1308 	}
1309 	if (inp->inp_faddr.s_addr == INADDR_ANY) {
1310 		error = ENOTCONN;
1311 		goto out;
1312 	}
1313 
1314 	soreference(so);
1315 
1316 	udbinfo_barrier_set();
1317 	in_pcbdisconnect(inp);
1318 	udbinfo_barrier_rem();
1319 
1320 	soclrstate(so, SS_ISCONNECTED);		/* XXX */
1321 	sofree(so);
1322 
1323 	ro = &inp->inp_route;
1324 	if (ro->ro_rt != NULL)
1325 		RTFREE(ro->ro_rt);
1326 	bzero(ro, sizeof(*ro));
1327 	error = 0;
1328 out:
1329 	lwkt_replymsg(&msg->disconnect.base.lmsg, error);
1330 }
1331 
1332 void
1333 udp_shutdown(netmsg_t msg)
1334 {
1335 	struct socket *so = msg->shutdown.base.nm_so;
1336 	struct inpcb *inp;
1337 	int error;
1338 
1339 	KKASSERT(&curthread->td_msgport == netisr_cpuport(0));
1340 
1341 	inp = so->so_pcb;
1342 	if (inp) {
1343 		socantsendmore(so);
1344 		error = 0;
1345 	} else {
1346 		error = EINVAL;
1347 	}
1348 	lwkt_replymsg(&msg->shutdown.base.lmsg, error);
1349 }
1350 
1351 void
1352 udbinfo_lock(void)
1353 {
1354 	lwkt_serialize_enter(&udbinfo_slize);
1355 }
1356 
1357 void
1358 udbinfo_unlock(void)
1359 {
1360 	lwkt_serialize_exit(&udbinfo_slize);
1361 }
1362 
1363 void
1364 udbinfo_barrier_set(void)
1365 {
1366 	netisr_barrier_set(udbinfo_br);
1367 	udbinfo_lock();
1368 }
1369 
1370 void
1371 udbinfo_barrier_rem(void)
1372 {
1373 	udbinfo_unlock();
1374 	netisr_barrier_rem(udbinfo_br);
1375 }
1376 
1377 struct pr_usrreqs udp_usrreqs = {
1378 	.pru_abort = udp_abort,
1379 	.pru_accept = pr_generic_notsupp,
1380 	.pru_attach = udp_attach,
1381 	.pru_bind = udp_bind,
1382 	.pru_connect = udp_connect,
1383 	.pru_connect2 = pr_generic_notsupp,
1384 	.pru_control = in_control_dispatch,
1385 	.pru_detach = udp_detach,
1386 	.pru_disconnect = udp_disconnect,
1387 	.pru_listen = pr_generic_notsupp,
1388 	.pru_peeraddr = in_setpeeraddr_dispatch,
1389 	.pru_rcvd = pr_generic_notsupp,
1390 	.pru_rcvoob = pr_generic_notsupp,
1391 	.pru_send = udp_send,
1392 	.pru_sense = pru_sense_null,
1393 	.pru_shutdown = udp_shutdown,
1394 	.pru_sockaddr = in_setsockaddr_dispatch,
1395 	.pru_sosend = sosendudp,
1396 	.pru_soreceive = soreceive
1397 };
1398 
1399