xref: /minix/minix/net/lwip/udpsock.c (revision 9f81acbc)
1 /* LWIP service - udpsock.c - UDP sockets */
2 
3 #include "lwip.h"
4 #include "ifaddr.h"
5 #include "pktsock.h"
6 
7 #include "lwip/udp.h"
8 
9 #include <netinet/udp.h>
10 #include <netinet/ip_var.h>
11 #include <netinet/udp_var.h>
12 
13 /* The number of UDP sockets.  Inherited from the lwIP configuration. */
14 #define NR_UDPSOCK	MEMP_NUM_UDP_PCB
15 
16 /*
17  * Outgoing packets are not getting buffered, so the send buffer size simply
18  * determines the maximum size for sent packets.  The send buffer maximum is
19  * therefore limited to the maximum size of a single packet (64K-1 bytes),
20  * which is already enforced by lwIP's 16-bit length parameter to pbuf_alloc().
21  *
22  * The actual transmission may enforce a lower limit, though.  The full packet
23  * size must not exceed the same 64K-1 limit, and that includes any headers
24  * that still have to be prepended to the given packet.  The size of those
25  * headers depends on the socket type (IPv4/IPv6) and the IP_HDRINCL setting.
26  */
27 #define UDP_MAX_PAYLOAD	(UINT16_MAX)
28 
29 #define UDP_SNDBUF_MIN	1		/* minimum UDP send buffer size */
30 #define UDP_SNDBUF_DEF	8192		/* default UDP send buffer size */
31 #define UDP_SNDBUF_MAX	UDP_MAX_PAYLOAD	/* maximum UDP send buffer size */
32 #define UDP_RCVBUF_MIN	MEMPOOL_BUFSIZE	/* minimum UDP receive buffer size */
33 #define UDP_RCVBUF_DEF	32768		/* default UDP receive buffer size */
34 #define UDP_RCVBUF_MAX	65536		/* maximum UDP receive buffer size */
35 
36 static struct udpsock {
37 	struct pktsock udp_pktsock;		/* pkt socket, MUST be first */
38 	struct udp_pcb *udp_pcb;		/* lwIP UDP control block */
39 	SIMPLEQ_ENTRY(udpsock) udp_next;	/* next in free list */
40 } udp_array[NR_UDPSOCK];
41 
42 static SIMPLEQ_HEAD(, udpsock) udp_freelist;	/* list of free UDP sockets */
43 
44 static const struct sockevent_ops udpsock_ops;
45 
46 #define udpsock_get_sock(udp)	(ipsock_get_sock(udpsock_get_ipsock(udp)))
47 #define udpsock_get_ipsock(udp)	(pktsock_get_ipsock(&(udp)->udp_pktsock))
48 #define udpsock_is_ipv6(udp)	(ipsock_is_ipv6(udpsock_get_ipsock(udp)))
49 #define udpsock_is_conn(udp)	\
50 	(udp_flags((udp)->udp_pcb) & UDP_FLAGS_CONNECTED)
51 
52 static ssize_t udpsock_pcblist(struct rmib_call *, struct rmib_node *,
53 	struct rmib_oldp *, struct rmib_newp *);
54 
55 /* The CTL_NET {PF_INET,PF_INET6} IPPROTO_UDP subtree. */
56 /* TODO: add many more and make some of them writable.. */
57 static struct rmib_node net_inet_udp_table[] = {
58 /* 1*/	[UDPCTL_CHECKSUM]	= RMIB_INT(RMIB_RO, 1, "checksum",
59 				    "Compute UDP checksums"),
60 /* 2*/	[UDPCTL_SENDSPACE]	= RMIB_INT(RMIB_RO, UDP_SNDBUF_DEF,
61 				    "sendspace",
62 				    "Default UDP send buffer size"),
63 /* 3*/	[UDPCTL_RECVSPACE]	= RMIB_INT(RMIB_RO, UDP_RCVBUF_DEF,
64 				    "recvspace",
65 				    "Default UDP receive buffer size"),
66 /* 4*/	[UDPCTL_LOOPBACKCKSUM]	= RMIB_FUNC(RMIB_RW | CTLTYPE_INT, sizeof(int),
67 				    loopif_cksum, "do_loopback_cksum",
68 				    "Perform UDP checksum on loopback"),
69 /*+0*/	[UDPCTL_MAXID]		= RMIB_FUNC(RMIB_RO | CTLTYPE_NODE, 0,
70 				    udpsock_pcblist, "pcblist",
71 				    "UDP protocol control block list"),
72 };
73 
74 static struct rmib_node net_inet_udp_node =
75     RMIB_NODE(RMIB_RO, net_inet_udp_table, "udp", "UDPv4 related settings");
76 static struct rmib_node net_inet6_udp6_node =
77     RMIB_NODE(RMIB_RO, net_inet_udp_table, "udp6", "UDPv6 related settings");
78 
79 /*
80  * Initialize the UDP sockets module.
81  */
82 void
83 udpsock_init(void)
84 {
85 	unsigned int slot;
86 
87 	/* Initialize the list of free UDP sockets. */
88 	SIMPLEQ_INIT(&udp_freelist);
89 
90 	for (slot = 0; slot < __arraycount(udp_array); slot++)
91 		SIMPLEQ_INSERT_TAIL(&udp_freelist, &udp_array[slot], udp_next);
92 
93 	/* Register the net.inet.udp and net.inet6.udp6 RMIB subtrees. */
94 	mibtree_register_inet(PF_INET, IPPROTO_UDP, &net_inet_udp_node);
95 	mibtree_register_inet(PF_INET6, IPPROTO_UDP, &net_inet6_udp6_node);
96 }
97 
98 /*
99  * A packet has arrived on a UDP socket.  We own the given packet buffer, and
100  * so we must free it if we do not want to keep it.
101  */
102 static void
103 udpsock_input(void * arg, struct udp_pcb * pcb __unused, struct pbuf * pbuf,
104 	const ip_addr_t * ipaddr, uint16_t port)
105 {
106 	struct udpsock *udp = (struct udpsock *)arg;
107 
108 	/* All UDP input processing is handled by pktsock. */
109 	pktsock_input(&udp->udp_pktsock, pbuf, ipaddr, port);
110 }
111 
112 /*
113  * Create a UDP socket.
114  */
115 sockid_t
116 udpsock_socket(int domain, int protocol, struct sock ** sockp,
117 	const struct sockevent_ops ** ops)
118 {
119 	struct udpsock *udp;
120 	unsigned int flags;
121 	uint8_t ip_type;
122 
123 	switch (protocol) {
124 	case 0:
125 	case IPPROTO_UDP:
126 		break;
127 
128 	/* NetBSD does not support IPPROTO_UDPLITE, even though lwIP does. */
129 	default:
130 		return EPROTONOSUPPORT;
131 	}
132 
133 	if (SIMPLEQ_EMPTY(&udp_freelist))
134 		return ENOBUFS;
135 
136 	udp = SIMPLEQ_FIRST(&udp_freelist);
137 
138 	ip_type = pktsock_socket(&udp->udp_pktsock, domain, UDP_SNDBUF_DEF,
139 	    UDP_RCVBUF_DEF, sockp);
140 
141 	/* We should have enough PCBs so this call should not fail.. */
142 	if ((udp->udp_pcb = udp_new_ip_type(ip_type)) == NULL)
143 		return ENOBUFS;
144 	udp_recv(udp->udp_pcb, udpsock_input, (void *)udp);
145 
146 	/* By default, the multicast TTL is 1 and looping is enabled. */
147 	udp_set_multicast_ttl(udp->udp_pcb, 1);
148 
149 	flags = udp_flags(udp->udp_pcb);
150 	udp_setflags(udp->udp_pcb, flags | UDP_FLAGS_MULTICAST_LOOP);
151 
152 	SIMPLEQ_REMOVE_HEAD(&udp_freelist, udp_next);
153 
154 	*ops = &udpsock_ops;
155 	return SOCKID_UDP | (sockid_t)(udp - udp_array);
156 }
157 
158 /*
159  * Bind a UDP socket to a local address.
160  */
161 static int
162 udpsock_bind(struct sock * sock, const struct sockaddr * addr,
163 	socklen_t addr_len, endpoint_t user_endpt)
164 {
165 	struct udpsock *udp = (struct udpsock *)sock;
166 	ip_addr_t ipaddr;
167 	uint16_t port;
168 	err_t err;
169 	int r;
170 
171 	if ((r = ipsock_get_src_addr(udpsock_get_ipsock(udp), addr, addr_len,
172 	    user_endpt, &udp->udp_pcb->local_ip, udp->udp_pcb->local_port,
173 	    TRUE /*allow_mcast*/, &ipaddr, &port)) != OK)
174 		return r;
175 
176 	err = udp_bind(udp->udp_pcb, &ipaddr, port);
177 
178 	return util_convert_err(err);
179 }
180 
181 /*
182  * Connect a UDP socket to a remote address.
183  */
184 static int
185 udpsock_connect(struct sock * sock, const struct sockaddr * addr,
186 	socklen_t addr_len, endpoint_t user_endpt __unused)
187 {
188 	struct udpsock *udp = (struct udpsock *)sock;
189 	struct ifdev *ifdev;
190 	const ip_addr_t *src_addr;
191 	ip_addr_t dst_addr;
192 	uint16_t dst_port;
193 	uint32_t ifindex, ifindex2;
194 	err_t err;
195 	int r;
196 
197 	/*
198 	 * One may "unconnect" socket by providing an address with family
199 	 * AF_UNSPEC.  Providing an <any>:0 address does not achieve the same.
200 	 */
201 	if (addr_is_unspec(addr, addr_len)) {
202 		udp_disconnect(udp->udp_pcb);
203 
204 		return OK;
205 	}
206 
207 	if ((r = ipsock_get_dst_addr(udpsock_get_ipsock(udp), addr,
208 	    addr_len, &udp->udp_pcb->local_ip, &dst_addr, &dst_port)) != OK)
209 		return r;
210 
211 	/*
212 	 * Bind explicitly to a source address if the PCB is not bound to one
213 	 * yet.  This is expected in the BSD socket API, but lwIP does not do
214 	 * it for us.
215 	 */
216 	if (ip_addr_isany(&udp->udp_pcb->local_ip)) {
217 		/* Help the multicast case a bit, if possible. */
218 		ifdev = NULL;
219 
220 		if (ip_addr_ismulticast(&dst_addr)) {
221 			ifindex = pktsock_get_ifindex(&udp->udp_pktsock);
222 			ifindex2 = udp_get_multicast_netif_index(udp->udp_pcb);
223 			if (ifindex == 0)
224 				ifindex = ifindex2;
225 
226 			if (ifindex != 0) {
227 				ifdev = ifdev_get_by_index(ifindex);
228 
229 				if (ifdev == NULL)
230 					return ENXIO;
231 			}
232 		}
233 
234 		src_addr = ifaddr_select(&dst_addr, ifdev, NULL /*ifdevp*/);
235 
236 		if (src_addr == NULL)
237 			return EHOSTUNREACH;
238 
239 		err = udp_bind(udp->udp_pcb, src_addr,
240 		    udp->udp_pcb->local_port);
241 
242 		if (err != ERR_OK)
243 			return util_convert_err(err);
244 	}
245 
246 	/*
247 	 * Connecting a UDP socket serves two main purposes: 1) the socket uses
248 	 * the address as destination when sending, and 2) the socket receives
249 	 * packets from only the connected address.
250 	 */
251 	err = udp_connect(udp->udp_pcb, &dst_addr, dst_port);
252 
253 	if (err != ERR_OK)
254 		return util_convert_err(err);
255 
256 	return OK;
257 }
258 
259 /*
260  * Perform preliminary checks on a send request.
261  */
262 static int
263 udpsock_pre_send(struct sock * sock, size_t len, socklen_t ctl_len __unused,
264 	const struct sockaddr * addr, socklen_t addr_len __unused,
265 	endpoint_t user_endpt __unused, int flags)
266 {
267 	struct udpsock *udp = (struct udpsock *)sock;
268 
269 	if ((flags & ~MSG_DONTROUTE) != 0)
270 		return EOPNOTSUPP;
271 
272 	if (!udpsock_is_conn(udp) && addr == NULL)
273 		return EDESTADDRREQ;
274 
275 	/*
276 	 * This is only one part of the length check.  The rest is done from
277 	 * udpsock_send(), once we have more information.
278 	 */
279 	if (len > ipsock_get_sndbuf(udpsock_get_ipsock(udp)))
280 		return EMSGSIZE;
281 
282 	return OK;
283 }
284 
285 /*
286  * Swap IP-level options between the UDP PCB and the packet options structure,
287  * for all options that have their flag set in the packet options structure.
288  * This function is called twice when sending a packet.  The result is that the
289  * flagged options are overridden for only the packet being sent.
290  */
291 static void
292 udpsock_swap_opt(struct udpsock * udp, struct pktopt * pkto)
293 {
294 	uint8_t tos, ttl, mcast_ttl;
295 
296 	if (pkto->pkto_flags & PKTOF_TOS) {
297 		tos = udp->udp_pcb->tos;
298 		udp->udp_pcb->tos = pkto->pkto_tos;
299 		pkto->pkto_tos = tos;
300 	}
301 
302 	if (pkto->pkto_flags & PKTOF_TTL) {
303 		ttl = udp->udp_pcb->ttl;
304 		mcast_ttl = udp_get_multicast_ttl(udp->udp_pcb);
305 		udp->udp_pcb->ttl = pkto->pkto_ttl;
306 		udp_set_multicast_ttl(udp->udp_pcb, pkto->pkto_mcast_ttl);
307 		pkto->pkto_ttl = ttl;
308 		pkto->pkto_mcast_ttl = mcast_ttl;
309 	}
310 }
311 
312 /*
313  * Send a packet on a UDP socket.
314  */
315 static int
316 udpsock_send(struct sock * sock, const struct sockdriver_data * data,
317 	size_t len, size_t * off, const struct sockdriver_data * ctl,
318 	socklen_t ctl_len, socklen_t * ctl_off __unused,
319 	const struct sockaddr * addr, socklen_t addr_len,
320 	endpoint_t user_endpt __unused, int flags, size_t min __unused)
321 {
322 	struct udpsock *udp = (struct udpsock *)sock;
323 	struct pktopt pktopt;
324 	struct pbuf *pbuf;
325 	struct ifdev *ifdev;
326 	struct netif *netif;
327 	const ip_addr_t *src_addrp, *dst_addrp;
328 	ip_addr_t src_addr, dst_addr; /* for storage only; not always used! */
329 	uint16_t dst_port;
330 	uint32_t ifindex;
331 	size_t hdrlen;
332 	err_t err;
333 	int r;
334 
335 	/* Copy in and parse any packet options. */
336 	pktopt.pkto_flags = 0;
337 
338 	if ((r = pktsock_get_ctl(&udp->udp_pktsock, ctl, ctl_len,
339 	    &pktopt)) != OK)
340 		return r;
341 
342 	/*
343 	 * The code below will both determine an outgoing interface and a
344 	 * source address for the packet.  Even though lwIP could do this for
345 	 * us in some cases, there are other cases where we must do so
346 	 * ourselves, with as main reasons 1) the possibility that either or
347 	 * both have been provided through IPV6_PKTINFO, and 2) our intent to
348 	 * detect and stop zone violations for (combinations of) scoped IPv6
349 	 * addresses.  As a result, it is easier to simply take over the
350 	 * selection tasks lwIP in their entirety.
351 	 *
352 	 * Much of the same applies to rawsock_send() as well.  Functional
353 	 * differences (e.g. IP_HDRINCL support) as well as the PCB accesses in
354 	 * the code make it hard to merge the two into a single pktsock copy.
355 	 * Please do keep the two in sync as much as possible.
356 	 */
357 
358 	/*
359 	 * Start by checking whether the source address and/or the outgoing
360 	 * interface are overridden using sticky and/or ancillary options.  The
361 	 * call to pktsock_get_pktinfo(), if successful, will either set
362 	 * 'ifdev' to NULL, in which case there is no override, or it will set
363 	 * 'ifdev' to the outgoing interface to use, and (only) in that case
364 	 * also fill 'src_addr', with an address that may either be a locally
365 	 * owned unicast address or the unspecified ('any') address.  If it is
366 	 * a unicast address, that is the source address to use for the packet.
367 	 * Otherwise, fall back to the address to which the socket is bound,
368 	 * which may also be the unspecified address or even a multicast
369 	 * address.  In those case we will pick a source address further below.
370 	 */
371 	if ((r = pktsock_get_pktinfo(&udp->udp_pktsock, &pktopt, &ifdev,
372 	    &src_addr)) != OK)
373 		return r;
374 
375 	if (ifdev != NULL && !ip_addr_isany(&src_addr)) {
376 		/* This is guaranteed to be a proper local unicast address. */
377 		src_addrp = &src_addr;
378 	} else {
379 		src_addrp = &udp->udp_pcb->local_ip;
380 
381 		/*
382 		 * If the socket is bound to a multicast address, use the
383 		 * unspecified ('any') address as source address instead, until
384 		 * we select a real source address (further below).  This
385 		 * substitution keeps the rest of the code a bit simpler.
386 		 */
387 		if (ip_addr_ismulticast(src_addrp))
388 			src_addrp = IP46_ADDR_ANY(IP_GET_TYPE(src_addrp));
389 	}
390 
391 	/*
392 	 * Determine the destination address to use.  If the socket is
393 	 * connected, always ignore any address provided in the send call.
394 	 */
395 	if (!udpsock_is_conn(udp)) {
396 		assert(addr != NULL); /* already checked in pre_send */
397 
398 		if ((r = ipsock_get_dst_addr(udpsock_get_ipsock(udp), addr,
399 		    addr_len, src_addrp, &dst_addr, &dst_port)) != OK)
400 			return r;
401 
402 		dst_addrp = &dst_addr;
403 	} else {
404 		dst_addrp = &udp->udp_pcb->remote_ip;
405 		dst_port = udp->udp_pcb->remote_port;
406 	}
407 
408 	/*
409 	 * If the destination is a multicast address, select the outgoing
410 	 * interface based on the multicast interface index, if one is set.
411 	 * This must be done here in order to allow the code further below to
412 	 * detect zone violations, because if we leave this selection to lwIP,
413 	 * it will not perform zone violation detection at all.  Also note that
414 	 * this case must *not* override an interface index already specified
415 	 * using IPV6_PKTINFO, as per RFC 3542 Sec. 6.7.
416 	 */
417 	if (ifdev == NULL && ip_addr_ismulticast(dst_addrp)) {
418 		ifindex = udp_get_multicast_netif_index(udp->udp_pcb);
419 
420 		if (ifindex != NETIF_NO_INDEX)
421 			ifdev = ifdev_get_by_index(ifindex); /* (may fail) */
422 	}
423 
424 	/*
425 	 * If an interface has been determined already now, the send operation
426 	 * will bypass routing.  In that case, we must perform our own checks
427 	 * on address zone violations, because those will not be made anywhere
428 	 * else.  Subsequent steps below will never introduce violations.
429 	 */
430 	if (ifdev != NULL && IP_IS_V6(dst_addrp)) {
431 		if (ifaddr_is_zone_mismatch(ip_2_ip6(dst_addrp), ifdev))
432 			return EHOSTUNREACH;
433 
434 		if (IP_IS_V6(src_addrp) &&
435 		    ifaddr_is_zone_mismatch(ip_2_ip6(src_addrp), ifdev))
436 			return EHOSTUNREACH;
437 	}
438 
439 	/*
440 	 * If we do not yet have an interface at this point, perform a route
441 	 * lookup to determine the outgoing interface.  Unless MSG_DONTROUTE is
442 	 * set (which covers SO_DONTROUTE as well), in which case we look for a
443 	 * local subnet that matches the destination address.
444 	 */
445 	if (ifdev == NULL) {
446 		if (!(flags & MSG_DONTROUTE)) {
447 			/*
448 			 * ip_route() should never be called with an
449 			 * IPADDR_TYPE_ANY type address.  This is a lwIP-
450 			 * internal requirement; while we override both routing
451 			 * functions, we do not deviate from it.
452 			 */
453 			if (IP_IS_ANY_TYPE_VAL(*src_addrp))
454 				src_addrp =
455 				    IP46_ADDR_ANY(IP_GET_TYPE(dst_addrp));
456 
457 			/* Perform the route lookup. */
458 			if ((netif = ip_route(src_addrp, dst_addrp)) == NULL)
459 				return EHOSTUNREACH;
460 
461 			ifdev = netif_get_ifdev(netif);
462 		} else {
463 			if ((ifdev = ifaddr_map_by_subnet(dst_addrp)) == NULL)
464 				return EHOSTUNREACH;
465 		}
466 	}
467 
468 	/*
469 	 * At this point we have an outgoing interface.  If we do not have a
470 	 * source address yet, pick one now.
471 	 */
472 	assert(ifdev != NULL);
473 
474 	if (ip_addr_isany(src_addrp)) {
475 		src_addrp = ifaddr_select(dst_addrp, ifdev, NULL /*ifdevp*/);
476 
477 		if (src_addrp == NULL)
478 			return EHOSTUNREACH;
479 	}
480 
481 	/*
482 	 * Now that we know the full conditions of what we are about to send,
483 	 * check whether the packet size leaves enough room for lwIP to prepend
484 	 * headers.  If so, allocate a chain of pbufs for the packet.
485 	 */
486 	assert(len <= UDP_MAX_PAYLOAD);
487 
488 	if (IP_IS_V6(dst_addrp))
489 		hdrlen = IP6_HLEN + UDP_HLEN;
490 	else
491 		hdrlen = IP_HLEN + UDP_HLEN;
492 
493 	if (hdrlen + len > UDP_MAX_PAYLOAD)
494 		return EMSGSIZE;
495 
496 	if ((pbuf = pchain_alloc(PBUF_TRANSPORT, len)) == NULL)
497 		return ENOBUFS;
498 
499 	/* Copy in the packet data. */
500 	if ((r = pktsock_get_data(&udp->udp_pktsock, data, len, pbuf)) != OK) {
501 		pbuf_free(pbuf);
502 
503 		return r;
504 	}
505 
506 	/*
507 	 * Set broadcast/multicast flags for accounting purposes.  Only the
508 	 * multicast flag is used for output accounting, but for loopback
509 	 * traffic, both flags are copied and used for input accounting and
510 	 * setting MSG_MCAST/MSG_BCAST.
511 	 */
512 	if (ip_addr_ismulticast(dst_addrp))
513 		pbuf->flags |= PBUF_FLAG_LLMCAST;
514 	else if (ip_addr_isbroadcast(dst_addrp, ifdev_get_netif(ifdev)))
515 		pbuf->flags |= PBUF_FLAG_LLBCAST;
516 
517 	/* Send the packet. */
518 	udpsock_swap_opt(udp, &pktopt);
519 
520 	assert(!ip_addr_isany(src_addrp));
521 	assert(!ip_addr_ismulticast(src_addrp));
522 
523 	err = udp_sendto_if_src(udp->udp_pcb, pbuf, dst_addrp, dst_port,
524 	    ifdev_get_netif(ifdev), src_addrp);
525 
526 	udpsock_swap_opt(udp, &pktopt);
527 
528 	/* Free the pbuf, as a copy has been made. */
529 	pbuf_free(pbuf);
530 
531 	/*
532 	 * On success, make sure to return the size of the sent packet as well.
533 	 * As an aside: ctl_off need not be updated, as it is not returned.
534 	 */
535 	if ((r = util_convert_err(err)) == OK)
536 		*off = len;
537 	return r;
538 }
539 
540 /*
541  * Update the set of flag-type socket options on a UDP socket.
542  */
543 static void
544 udpsock_setsockmask(struct sock * sock, unsigned int mask)
545 {
546 	struct udpsock *udp = (struct udpsock *)sock;
547 
548 	if (mask & SO_REUSEADDR)
549 		ip_set_option(udp->udp_pcb, SOF_REUSEADDR);
550 	else
551 		ip_reset_option(udp->udp_pcb, SOF_REUSEADDR);
552 
553 	if (mask & SO_BROADCAST)
554 		ip_set_option(udp->udp_pcb, SOF_BROADCAST);
555 	else
556 		ip_reset_option(udp->udp_pcb, SOF_BROADCAST);
557 }
558 
559 /*
560  * Prepare a helper structure for IP-level option processing.
561  */
562 static void
563 udpsock_get_ipopts(struct udpsock * udp, struct ipopts * ipopts)
564 {
565 
566 	ipopts->local_ip = &udp->udp_pcb->local_ip;
567 	ipopts->remote_ip = &udp->udp_pcb->remote_ip;
568 	ipopts->tos = &udp->udp_pcb->tos;
569 	ipopts->ttl = &udp->udp_pcb->ttl;
570 	ipopts->sndmin = UDP_SNDBUF_MIN;
571 	ipopts->sndmax = UDP_SNDBUF_MAX;
572 	ipopts->rcvmin = UDP_RCVBUF_MIN;
573 	ipopts->rcvmax = UDP_RCVBUF_MAX;
574 }
575 
576 /*
577  * Set socket options on a UDP socket.
578  */
579 static int
580 udpsock_setsockopt(struct sock * sock, int level, int name,
581 	const struct sockdriver_data * data, socklen_t len)
582 {
583 	struct udpsock *udp = (struct udpsock *)sock;
584 	struct ipopts ipopts;
585 	ip_addr_t ipaddr;
586 	struct in_addr in_addr;
587 	struct ifdev *ifdev;
588 	unsigned int flags;
589 	uint32_t ifindex;
590 	uint8_t byte;
591 	int r, val;
592 
593 	/*
594 	 * Unfortunately, we have to duplicate most of the multicast options
595 	 * rather than sharing them with rawsock at the pktsock level.  The
596 	 * reason is that each of the PCBs have their own multicast abstraction
597 	 * functions and so we cannot merge the rest.  Same for getsockopt.
598 	 */
599 
600 	switch (level) {
601 	case IPPROTO_IP:
602 		if (udpsock_is_ipv6(udp))
603 			break;
604 
605 		switch (name) {
606 		case IP_MULTICAST_IF:
607 			pktsock_set_mcaware(&udp->udp_pktsock);
608 
609 			if ((r = sockdriver_copyin_opt(data, &in_addr,
610 			    sizeof(in_addr), len)) != OK)
611 				return r;
612 
613 			ip_addr_set_ip4_u32(&ipaddr, in_addr.s_addr);
614 
615 			if ((ifdev = ifaddr_map_by_addr(&ipaddr)) == NULL)
616 				return EADDRNOTAVAIL;
617 
618 			udp_set_multicast_netif_index(udp->udp_pcb,
619 			    ifdev_get_index(ifdev));
620 
621 			return OK;
622 
623 		case IP_MULTICAST_LOOP:
624 			pktsock_set_mcaware(&udp->udp_pktsock);
625 
626 			if ((r = sockdriver_copyin_opt(data, &byte,
627 			    sizeof(byte), len)) != OK)
628 				return r;
629 
630 			flags = udp_flags(udp->udp_pcb);
631 
632 			if (byte)
633 				flags |= UDP_FLAGS_MULTICAST_LOOP;
634 			else
635 				flags &= ~UDP_FLAGS_MULTICAST_LOOP;
636 
637 			udp_setflags(udp->udp_pcb, flags);
638 
639 			return OK;
640 
641 		case IP_MULTICAST_TTL:
642 			pktsock_set_mcaware(&udp->udp_pktsock);
643 
644 			if ((r = sockdriver_copyin_opt(data, &byte,
645 			    sizeof(byte), len)) != OK)
646 				return r;
647 
648 			udp_set_multicast_ttl(udp->udp_pcb, byte);
649 
650 			return OK;
651 		}
652 
653 		break;
654 
655 	case IPPROTO_IPV6:
656 		if (!udpsock_is_ipv6(udp))
657 			break;
658 
659 		switch (name) {
660 		case IPV6_MULTICAST_IF:
661 			pktsock_set_mcaware(&udp->udp_pktsock);
662 
663 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
664 			    len)) != OK)
665 				return r;
666 
667 			if (val != 0) {
668 				ifindex = (uint32_t)val;
669 
670 				ifdev = ifdev_get_by_index(ifindex);
671 
672 				if (ifdev == NULL)
673 					return ENXIO;
674 			} else
675 				ifindex = NETIF_NO_INDEX;
676 
677 			udp_set_multicast_netif_index(udp->udp_pcb, ifindex);
678 
679 			return OK;
680 
681 		case IPV6_MULTICAST_LOOP:
682 			pktsock_set_mcaware(&udp->udp_pktsock);
683 
684 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
685 			    len)) != OK)
686 				return r;
687 
688 			if (val < 0 || val > 1)
689 				return EINVAL;
690 
691 			flags = udp_flags(udp->udp_pcb);
692 
693 			if (val)
694 				flags |= UDP_FLAGS_MULTICAST_LOOP;
695 			else
696 				flags &= ~UDP_FLAGS_MULTICAST_LOOP;
697 
698 			/*
699 			 * lwIP's IPv6 functionality does not actually check
700 			 * this flag at all yet.  We set it in the hope that
701 			 * one day this will magically start working.
702 			 */
703 			udp_setflags(udp->udp_pcb, flags);
704 
705 			return OK;
706 
707 		case IPV6_MULTICAST_HOPS:
708 			pktsock_set_mcaware(&udp->udp_pktsock);
709 
710 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
711 			    len)) != OK)
712 				return r;
713 
714 			if (val < -1 || val > UINT8_MAX)
715 				return EINVAL;
716 
717 			if (val == -1)
718 				val = 1;
719 
720 			udp_set_multicast_ttl(udp->udp_pcb, val);
721 
722 			return OK;
723 		}
724 
725 		break;
726 	}
727 
728 	/* Handle all other options at the packet or IP level. */
729 	udpsock_get_ipopts(udp, &ipopts);
730 
731 	return pktsock_setsockopt(&udp->udp_pktsock, level, name, data, len,
732 	    &ipopts);
733 }
734 
735 /*
736  * Retrieve socket options on a UDP socket.
737  */
738 static int
739 udpsock_getsockopt(struct sock * sock, int level, int name,
740 	const struct sockdriver_data * data, socklen_t * len)
741 {
742 	struct udpsock *udp = (struct udpsock *)sock;
743 	struct ipopts ipopts;
744 	const ip4_addr_t *ip4addr;
745 	struct in_addr in_addr;
746 	struct ifdev *ifdev;
747 	unsigned int flags;
748 	uint32_t ifindex;
749 	uint8_t byte;
750 	int val;
751 
752 	switch (level) {
753 	case IPPROTO_IP:
754 		if (udpsock_is_ipv6(udp))
755 			break;
756 
757 		switch (name) {
758 		case IP_MULTICAST_IF:
759 			ifindex = udp_get_multicast_netif_index(udp->udp_pcb);
760 
761 			/*
762 			 * Map back from the interface index to the IPv4
763 			 * address assigned to the corresponding interface.
764 			 * Should this not work out, return the 'any' address.
765 			 */
766 			if (ifindex != NETIF_NO_INDEX &&
767 			   (ifdev = ifdev_get_by_index(ifindex)) != NULL) {
768 				ip4addr =
769 				    netif_ip4_addr(ifdev_get_netif(ifdev));
770 
771 				in_addr.s_addr = ip4_addr_get_u32(ip4addr);
772 			} else
773 				in_addr.s_addr = PP_HTONL(INADDR_ANY);
774 
775 			return sockdriver_copyout_opt(data, &in_addr,
776 			    sizeof(in_addr), len);
777 
778 		case IP_MULTICAST_LOOP:
779 			flags = udp_flags(udp->udp_pcb);
780 
781 			byte = !!(flags & UDP_FLAGS_MULTICAST_LOOP);
782 
783 			return sockdriver_copyout_opt(data, &byte,
784 			    sizeof(byte), len);
785 
786 		case IP_MULTICAST_TTL:
787 			byte = udp_get_multicast_ttl(udp->udp_pcb);
788 
789 			return sockdriver_copyout_opt(data, &byte,
790 			    sizeof(byte), len);
791 		}
792 
793 		break;
794 
795 	case IPPROTO_IPV6:
796 		if (!udpsock_is_ipv6(udp))
797 			break;
798 
799 		switch (name) {
800 		case IPV6_MULTICAST_IF:
801 			ifindex = udp_get_multicast_netif_index(udp->udp_pcb);
802 
803 			val = (int)ifindex;
804 
805 			return sockdriver_copyout_opt(data, &val, sizeof(val),
806 			    len);
807 
808 		case IPV6_MULTICAST_LOOP:
809 			flags = udp_flags(udp->udp_pcb);
810 
811 			val = !!(flags & UDP_FLAGS_MULTICAST_LOOP);
812 
813 			return sockdriver_copyout_opt(data, &val, sizeof(val),
814 			    len);
815 
816 		case IPV6_MULTICAST_HOPS:
817 			val = udp_get_multicast_ttl(udp->udp_pcb);
818 
819 			return sockdriver_copyout_opt(data, &val, sizeof(val),
820 			    len);
821 		}
822 
823 		break;
824 	}
825 
826 	/* Handle all other options at the packet or IP level. */
827 	udpsock_get_ipopts(udp, &ipopts);
828 
829 	return pktsock_getsockopt(&udp->udp_pktsock, level, name, data, len,
830 	    &ipopts);
831 }
832 
833 /*
834  * Retrieve the local socket address of a UDP socket.
835  */
836 static int
837 udpsock_getsockname(struct sock * sock, struct sockaddr * addr,
838 	socklen_t * addr_len)
839 {
840 	struct udpsock *udp = (struct udpsock *)sock;
841 
842 	ipsock_put_addr(udpsock_get_ipsock(udp), addr, addr_len,
843 	    &udp->udp_pcb->local_ip, udp->udp_pcb->local_port);
844 
845 	return OK;
846 }
847 
848 /*
849  * Retrieve the remote socket address of a UDP socket.
850  */
851 static int
852 udpsock_getpeername(struct sock * sock, struct sockaddr * addr,
853 	socklen_t * addr_len)
854 {
855 	struct udpsock *udp = (struct udpsock *)sock;
856 
857 	if (!udpsock_is_conn(udp))
858 		return ENOTCONN;
859 
860 	ipsock_put_addr(udpsock_get_ipsock(udp), addr, addr_len,
861 	    &udp->udp_pcb->remote_ip, udp->udp_pcb->remote_port);
862 
863 	return OK;
864 }
865 
866 /*
867  * Shut down a UDP socket for reading and/or writing.
868  */
869 static int
870 udpsock_shutdown(struct sock * sock, unsigned int mask)
871 {
872 	struct udpsock *udp = (struct udpsock *)sock;
873 
874 	if (mask & SFL_SHUT_RD)
875 		udp_recv(udp->udp_pcb, NULL, NULL);
876 
877 	pktsock_shutdown(&udp->udp_pktsock, mask);
878 
879 	return OK;
880 }
881 
882 /*
883  * Close a UDP socket.
884  */
885 static int
886 udpsock_close(struct sock * sock, int force __unused)
887 {
888 	struct udpsock *udp = (struct udpsock *)sock;
889 
890 	udp_recv(udp->udp_pcb, NULL, NULL);
891 
892 	udp_remove(udp->udp_pcb);
893 	udp->udp_pcb = NULL;
894 
895 	pktsock_close(&udp->udp_pktsock);
896 
897 	return OK;
898 }
899 
900 /*
901  * Free up a closed UDP socket.
902  */
903 static void
904 udpsock_free(struct sock * sock)
905 {
906 	struct udpsock *udp = (struct udpsock *)sock;
907 
908 	assert(udp->udp_pcb == NULL);
909 
910 	SIMPLEQ_INSERT_HEAD(&udp_freelist, udp, udp_next);
911 }
912 
913 /*
914  * Fill the given kinfo_pcb sysctl(7) structure with information about the UDP
915  * PCB identified by the given pointer.
916  */
917 static void
918 udpsock_get_info(struct kinfo_pcb * ki, const void * ptr)
919 {
920 	const struct udp_pcb *pcb = (const struct udp_pcb *)ptr;
921 	struct udpsock *udp;
922 
923 	ki->ki_type = SOCK_DGRAM;
924 
925 	/*
926 	 * All UDP sockets should be created by this module, but protect
927 	 * ourselves from the case that that is not true anyway.
928 	 */
929 	if (pcb->recv_arg != NULL) {
930 		udp = (struct udpsock *)pcb->recv_arg;
931 
932 		assert(udp >= udp_array &&
933 		    udp < &udp_array[__arraycount(udp_array)]);
934 	} else
935 		udp = NULL;
936 
937 	ipsock_get_info(ki, &pcb->local_ip, pcb->local_port, &pcb->remote_ip,
938 	    pcb->remote_port);
939 
940 	if (udp != NULL) {
941 		/* TODO: change this so that sockstat(1) may work one day. */
942 		ki->ki_sockaddr = (uint64_t)(uintptr_t)udpsock_get_sock(udp);
943 
944 		ki->ki_rcvq = pktsock_get_recvlen(&udp->udp_pktsock);
945 	}
946 }
947 
948 /*
949  * Given either NULL or a previously returned UDP PCB pointer, return the first
950  * or next UDP PCB pointer, or NULL if there are no more.  Skip UDP PCBs that
951  * are not bound to an address, as there is no use reporting them.
952  */
953 static const void *
954 udpsock_enum(const void * last)
955 {
956 	const struct udp_pcb *pcb;
957 
958 	if (last != NULL)
959 		pcb = (const void *)((const struct udp_pcb *)last)->next;
960 	else
961 		pcb = (const void *)udp_pcbs;
962 
963 	while (pcb != NULL && pcb->local_port == 0)
964 		pcb = pcb->next;
965 
966 	return pcb;
967 }
968 
969 /*
970  * Obtain the list of UDP protocol control blocks, for sysctl(7).
971  */
972 static ssize_t
973 udpsock_pcblist(struct rmib_call * call, struct rmib_node * node __unused,
974 	struct rmib_oldp * oldp, struct rmib_newp * newp __unused)
975 {
976 
977 	return util_pcblist(call, oldp, udpsock_enum, udpsock_get_info);
978 }
979 
980 static const struct sockevent_ops udpsock_ops = {
981 	.sop_bind		= udpsock_bind,
982 	.sop_connect		= udpsock_connect,
983 	.sop_pre_send		= udpsock_pre_send,
984 	.sop_send		= udpsock_send,
985 	.sop_pre_recv		= pktsock_pre_recv,
986 	.sop_recv		= pktsock_recv,
987 	.sop_test_recv		= pktsock_test_recv,
988 	.sop_ioctl		= ifconf_ioctl,
989 	.sop_setsockmask	= udpsock_setsockmask,
990 	.sop_setsockopt		= udpsock_setsockopt,
991 	.sop_getsockopt		= udpsock_getsockopt,
992 	.sop_getsockname	= udpsock_getsockname,
993 	.sop_getpeername	= udpsock_getpeername,
994 	.sop_shutdown		= udpsock_shutdown,
995 	.sop_close		= udpsock_close,
996 	.sop_free		= udpsock_free
997 };
998