xref: /minix/minix/net/lwip/rawsock.c (revision fb9c64b2)
1 /* LWIP service - rawsock.c - RAW sockets */
2 /*
3  * For IPv6 sockets, this module attempts to implement a part of RFC 3542, but
4  * currently not more than what is supported by lwIP and/or what is expected by
5  * a handful of standard utilities (dhcpcd, ping6, traceroute6..).
6  *
7  * For general understanding, be aware that IPv4 raw sockets always receive
8  * packets including the IP header, and may be used to send packets including
9  * the IP header if IP_HDRINCL is set, while IPv6 raw sockets always send and
10  * receive actual payloads only, using ancillary (control) data to set and
11  * retrieve per-packet IP header fields.
12  *
13  * For packet headers we follow general BSD semantics.  For example, some IPv4
14  * header fields are swapped both when sending and when receiving.  Also, like
15  * on NetBSD, IPPROTO_RAW is not a special value in any way.
16  */
17 
18 #include "lwip.h"
19 #include "ifaddr.h"
20 #include "pktsock.h"
21 
22 #include "lwip/raw.h"
23 #include "lwip/inet_chksum.h"
24 
25 #include <net/route.h>
26 #include <netinet/icmp6.h>
27 #include <netinet/ip.h>
28 #include <netinet/in_pcb.h>
29 
30 /* The number of RAW sockets.  Inherited from the lwIP configuration. */
31 #define NR_RAWSOCK	MEMP_NUM_RAW_PCB
32 
33 /*
34  * Outgoing packets are not getting buffered, so the send buffer size simply
35  * determines the maximum size for sent packets.  The send buffer maximum is
36  * therefore limited to the maximum size of a single packet (64K-1 bytes),
37  * which is already enforced by lwIP's 16-bit length parameter to pbuf_alloc().
38  *
39  * The actual transmission may enforce a lower limit, though.  The full packet
40  * size must not exceed the same 64K-1 limit, and that includes any headers
41  * that still have to be prepended to the given packet.  The size of those
42  * headers depends on the socket type (IPv4/IPv6) and the IP_HDRINCL setting.
43  *
44  * The default is equal to the maximum here, because if a (by definition,
45  * privileged) application wishes to send large raw packets, it probably has a
46  * good reason, and we do not want to get in its way.
47  */
48 #define RAW_MAX_PAYLOAD	(UINT16_MAX)
49 
50 #define RAW_SNDBUF_MIN	1		/* minimum RAW send buffer size */
51 #define RAW_SNDBUF_DEF	RAW_MAX_PAYLOAD	/* default RAW send buffer size */
52 #define RAW_SNDBUF_MAX	RAW_MAX_PAYLOAD	/* maximum RAW send buffer size */
53 #define RAW_RCVBUF_MIN	MEMPOOL_BUFSIZE	/* minimum RAW receive buffer size */
54 #define RAW_RCVBUF_DEF	32768		/* default RAW receive buffer size */
55 #define RAW_RCVBUF_MAX	65536		/* maximum RAW receive buffer size */
56 
57 static struct rawsock {
58 	struct pktsock raw_pktsock;		/* packet socket object */
59 	struct raw_pcb *raw_pcb;		/* lwIP RAW control block */
60 	TAILQ_ENTRY(rawsock) raw_next;		/* next in active/free list */
61 	struct icmp6_filter raw_icmp6filter;	/* ICMPv6 type filter */
62 } raw_array[NR_RAWSOCK];
63 
64 static TAILQ_HEAD(, rawsock) raw_freelist;	/* list of free RAW sockets */
65 static TAILQ_HEAD(, rawsock) raw_activelist;	/* list, in-use RAW sockets */
66 
67 static const struct sockevent_ops rawsock_ops;
68 
69 #define rawsock_get_sock(raw)	(ipsock_get_sock(rawsock_get_ipsock(raw)))
70 #define rawsock_get_ipsock(raw)	(pktsock_get_ipsock(&(raw)->raw_pktsock))
71 #define rawsock_is_ipv6(raw)	(ipsock_is_ipv6(rawsock_get_ipsock(raw)))
72 #define rawsock_is_v6only(raw)	(ipsock_is_v6only(rawsock_get_ipsock(raw)))
73 #define rawsock_is_conn(raw)	\
74 	(raw_flags((raw)->raw_pcb) & RAW_FLAGS_CONNECTED)
75 #define rawsock_is_hdrincl(raw)	\
76 	(raw_flags((raw)->raw_pcb) & RAW_FLAGS_HDRINCL)
77 
78 static ssize_t rawsock_pcblist(struct rmib_call *, struct rmib_node *,
79 	struct rmib_oldp *, struct rmib_newp *);
80 
81 /* The CTL_NET {PF_INET,PF_INET6} IPPROTO_RAW subtree. */
82 /* All dynamically numbered; the sendspace/recvspace entries are ours. */
83 static struct rmib_node net_inet_raw_table[] = {
84 	RMIB_INT(RMIB_RO, RAW_SNDBUF_DEF, "sendspace",
85 	    "Default RAW send buffer size"),
86 	RMIB_INT(RMIB_RO, RAW_RCVBUF_DEF, "recvspace",
87 	    "Default RAW receive buffer size"),
88 	RMIB_FUNC(RMIB_RO | CTLTYPE_NODE, 0, rawsock_pcblist, "pcblist",
89 	    "RAW IP protocol control block list"),
90 };
91 
92 static struct rmib_node net_inet_raw_node =
93     RMIB_NODE(RMIB_RO, net_inet_raw_table, "raw", "RAW IPv4 settings");
94 static struct rmib_node net_inet6_raw6_node =
95     RMIB_NODE(RMIB_RO, net_inet_raw_table, "raw6", "RAW IPv6 settings");
96 
97 /*
98  * Initialize the raw sockets module.
99  */
100 void
101 rawsock_init(void)
102 {
103 	unsigned int slot;
104 
105 	/* Initialize the list of free RAW sockets. */
106 	TAILQ_INIT(&raw_freelist);
107 
108 	for (slot = 0; slot < __arraycount(raw_array); slot++)
109 		TAILQ_INSERT_TAIL(&raw_freelist, &raw_array[slot], raw_next);
110 
111 	/* Initialize the list of active RAW sockets. */
112 	TAILQ_INIT(&raw_activelist);
113 
114 	/* Register the net.inet.raw and net.inet6.raw6 RMIB subtrees. */
115 	mibtree_register_inet(PF_INET, IPPROTO_RAW, &net_inet_raw_node);
116 	mibtree_register_inet(PF_INET6, IPPROTO_RAW, &net_inet6_raw6_node);
117 }
118 
119 /*
120  * Check whether the given arrived IPv6 packet is fit to be received on the
121  * given raw socket.
122  */
123 static int
124 rawsock_check_v6(struct rawsock * raw, struct pbuf * pbuf)
125 {
126 	uint8_t type;
127 
128 	assert(rawsock_is_ipv6(raw));
129 
130 	/*
131 	 * For ICMPv6 packets, test against the configured type filter.
132 	 */
133 	if (raw->raw_pcb->protocol == IPPROTO_ICMPV6) {
134 		if (pbuf->len < offsetof(struct icmp6_hdr, icmp6_dataun))
135 			return FALSE;
136 
137 		memcpy(&type, &((struct icmp6_hdr *)pbuf->payload)->icmp6_type,
138 		    sizeof(type));
139 
140 		if (!ICMP6_FILTER_WILLPASS((int)type, &raw->raw_icmp6filter))
141 			return FALSE;
142 	}
143 
144 	/*
145 	 * For ICMPv6 packets, or if IPV6_CHECKSUM is enabled, we have to
146 	 * verify the checksum of the packet before passing it to the user.
147 	 * This is costly, but it needs to be done and lwIP is not doing it for
148 	 * us (as of writing, anyway), even though it maintains the offset..
149 	 */
150 	if (raw->raw_pcb->chksum_reqd &&
151 	    (pbuf->tot_len < raw->raw_pcb->chksum_offset + sizeof(uint16_t) ||
152 	    ip6_chksum_pseudo(pbuf, raw->raw_pcb->protocol, pbuf->tot_len,
153 	    ip6_current_src_addr(), ip6_current_dest_addr()) != 0)) {
154 		return FALSE;
155 	}
156 
157 	/* No reason to filter out this packet. */
158 	return TRUE;
159 }
160 
161 /*
162  * Adjust the given arrived IPv4 packet by changing the length and offset
163  * fields to host-byte order, as is done by the BSDs.  This effectively mirrors
164  * the swapping part of the preparation done on IPv4 packets being sent if the
165  * IP_HDRINCL socket option is enabled.
166  */
167 static void
168 rawsock_adjust_v4(struct pbuf * pbuf)
169 {
170 	struct ip_hdr *iphdr;
171 
172 	if (pbuf->len < sizeof(struct ip_hdr))
173 		return;
174 
175 	iphdr = (struct ip_hdr *)pbuf->payload;
176 
177 	/*
178 	 * W. Richard Stevens also mentions ip_id, but at least on NetBSD that
179 	 * field seems to be swapped neither when sending nor when receiving..
180 	 */
181 	IPH_LEN(iphdr) = htons(IPH_LEN(iphdr));
182 	IPH_OFFSET(iphdr) = htons(IPH_OFFSET(iphdr));
183 }
184 
185 /*
186  * A packet has arrived on a raw socket.  Since the same packet may have to be
187  * delivered to multiple raw sockets, we always return 0 (= not consumed) from
188  * this function.  As such, we must make a copy of the given packet if we want
189  * to keep it, and never free it.
190  */
191 static uint8_t
192 rawsock_input(void * arg, struct raw_pcb * pcb __unused, struct pbuf * psrc,
193 	const ip_addr_t * srcaddr)
194 {
195 	struct rawsock *raw = (struct rawsock *)arg;
196 	struct pbuf *pbuf;
197 	int off, hdrlen;
198 
199 	assert(raw->raw_pcb == pcb);
200 
201 	/*
202 	 * If adding this packet would cause the receive buffer to go beyond
203 	 * the current limit, drop the new packet.  This is just an estimation,
204 	 * because the copy we are about to make may not take the exact same
205 	 * amount of memory, due to the fact that 1) the pbuf we're given has
206 	 * an unknown set of headers in front of it, and 2) we need to store
207 	 * extra information in our copy.  The return value of this call, if
208 	 * not -1, is the number of bytes we need to reserve to store that
209 	 * extra information.
210 	 */
211 	if ((hdrlen = pktsock_test_input(&raw->raw_pktsock, psrc)) < 0)
212 		return 0;
213 
214 	/*
215 	 * Raw IPv6 sockets receive only the actual packet data, whereas raw
216 	 * IPv4 sockets receive the IP header as well.
217 	 */
218 	if (ip_current_is_v6()) {
219 		off = ip_current_header_tot_len();
220 
221 		util_pbuf_header(psrc, -off);
222 
223 		if (!rawsock_check_v6(raw, psrc)) {
224 			util_pbuf_header(psrc, off);
225 
226 			return 0;
227 		}
228 	} else {
229 		/*
230 		 * For IPv6 sockets, drop the packet if it was sent as an IPv4
231 		 * packet and checksumming is enabled (this includes ICMPv6).
232 		 * Otherwise, the packet would bypass the above checks that we
233 		 * perform on IPv6 packets.  Applications that want to use a
234 		 * dual-stack protocol with checksumming will have to do the
235 		 * checksum verification part themselves.  Presumably the two
236 		 * different pseudoheaders would result in different checksums
237 		 * anyhow, so it would be useless to try to support that.
238 		 *
239 		 * Beyond that, for IPv4 packets on IPv6 sockets, hide the IPv4
240 		 * header.
241 		 */
242 		if (rawsock_is_ipv6(raw)) {
243 			if (raw->raw_pcb->chksum_reqd)
244 				return 0;
245 
246 			off = IP_HLEN;
247 
248 			util_pbuf_header(psrc, -off);
249 		} else
250 			off = 0;
251 	}
252 
253 	/*
254 	 * We need to make a copy of the incoming packet.  If we eat the one
255 	 * given to us, this will 1) stop any other raw sockets from getting
256 	 * the same packet, 2) allow a single raw socket to discard all TCP/UDP
257 	 * traffic, and 3) present us with a problem on how to store ancillary
258 	 * data.  Raw sockets are not that performance critical so the extra
259 	 * copy -even when not always necessary- is not that big of a deal.
260 	 */
261 	if ((pbuf = pchain_alloc(PBUF_RAW, hdrlen + psrc->tot_len)) == NULL) {
262 		if (off > 0)
263 			util_pbuf_header(psrc, off);
264 
265 		return 0;
266 	}
267 
268 	util_pbuf_header(pbuf, -hdrlen);
269 
270 	if (pbuf_copy(pbuf, psrc) != ERR_OK)
271 		panic("unexpected pbuf copy failure");
272 
273 	pbuf->flags |= psrc->flags & (PBUF_FLAG_LLMCAST | PBUF_FLAG_LLBCAST);
274 
275 	if (off > 0)
276 		util_pbuf_header(psrc, off);
277 
278 	if (!rawsock_is_ipv6(raw))
279 		rawsock_adjust_v4(pbuf);
280 
281 	pktsock_input(&raw->raw_pktsock, pbuf, srcaddr, 0);
282 
283 	return 0;
284 }
285 
286 /*
287  * Create a raw socket.
288  */
289 sockid_t
290 rawsock_socket(int domain, int protocol, struct sock ** sockp,
291 	const struct sockevent_ops ** ops)
292 {
293 	struct rawsock *raw;
294 	unsigned int flags;
295 	uint8_t ip_type;
296 
297 	if (protocol < 0 || protocol > UINT8_MAX)
298 		return EPROTONOSUPPORT;
299 
300 	if (TAILQ_EMPTY(&raw_freelist))
301 		return ENOBUFS;
302 
303 	raw = TAILQ_FIRST(&raw_freelist);
304 
305 	/*
306 	 * Initialize the structure.  Do not memset it to zero, as it is still
307 	 * part of the linked free list.  Initialization may still fail.
308 	 */
309 
310 	ip_type = pktsock_socket(&raw->raw_pktsock, domain, RAW_SNDBUF_DEF,
311 	    RAW_RCVBUF_DEF, sockp);
312 
313 	/* We should have enough PCBs so this call should not fail.. */
314 	if ((raw->raw_pcb = raw_new_ip_type(ip_type, protocol)) == NULL)
315 		return ENOBUFS;
316 	raw_recv(raw->raw_pcb, rawsock_input, (void *)raw);
317 
318 	/* By default, the multicast TTL is 1 and looping is enabled. */
319 	raw_set_multicast_ttl(raw->raw_pcb, 1);
320 
321 	flags = raw_flags(raw->raw_pcb);
322 	raw_setflags(raw->raw_pcb, flags | RAW_FLAGS_MULTICAST_LOOP);
323 
324 	/*
325 	 * For ICMPv6, checksum generation and verification is mandatory and
326 	 * type filtering of incoming packets is supported (RFC 3542).  For all
327 	 * other IPv6 protocols, checksumming may be turned on by the user.
328 	 */
329 	if (rawsock_is_ipv6(raw) && protocol == IPPROTO_ICMPV6) {
330 		raw->raw_pcb->chksum_reqd = 1;
331 		raw->raw_pcb->chksum_offset =
332 		    offsetof(struct icmp6_hdr, icmp6_cksum);
333 
334 		ICMP6_FILTER_SETPASSALL(&raw->raw_icmp6filter);
335 	} else
336 		raw->raw_pcb->chksum_reqd = 0;
337 
338 	TAILQ_REMOVE(&raw_freelist, raw, raw_next);
339 
340 	TAILQ_INSERT_TAIL(&raw_activelist, raw, raw_next);
341 
342 	*ops = &rawsock_ops;
343 	return SOCKID_RAW | (sockid_t)(raw - raw_array);
344 }
345 
346 /*
347  * Bind a raw socket to a local address.
348  */
349 static int
350 rawsock_bind(struct sock * sock, const struct sockaddr * addr,
351 	socklen_t addr_len, endpoint_t user_endpt)
352 {
353 	struct rawsock *raw = (struct rawsock *)sock;
354 	ip_addr_t ipaddr;
355 	err_t err;
356 	int r;
357 
358 	/*
359 	 * Raw sockets may be rebound even if that is not too useful.  However,
360 	 * we do not allow (re)binding when the socket is connected, so as to
361 	 * eliminate any problems with source and destination type mismatches:
362 	 * such mismatches are detected at connect time, and rebinding would
363 	 * avoid those, possibly triggering lwIP asserts as a result.
364 	 */
365 	if (rawsock_is_conn(raw))
366 		return EINVAL;
367 
368 	if ((r = ipsock_get_src_addr(rawsock_get_ipsock(raw), addr, addr_len,
369 	    user_endpt, &raw->raw_pcb->local_ip, 0 /*local_port*/,
370 	    TRUE /*allow_mcast*/, &ipaddr, NULL /*portp*/)) != OK)
371 		return r;
372 
373 	err = raw_bind(raw->raw_pcb, &ipaddr);
374 
375 	return util_convert_err(err);
376 }
377 
378 /*
379  * Connect a raw socket to a remote address.
380  */
381 static int
382 rawsock_connect(struct sock * sock, const struct sockaddr * addr,
383 	socklen_t addr_len, endpoint_t user_endpt __unused)
384 {
385 	struct rawsock *raw = (struct rawsock *)sock;
386 	const ip_addr_t *src_addr;
387 	ip_addr_t dst_addr;
388 	struct ifdev *ifdev;
389 	uint32_t ifindex, ifindex2;
390 	err_t err;
391 	int r;
392 
393 	/*
394 	 * One may "unconnect" socket by providing an address with family
395 	 * AF_UNSPEC.
396 	 */
397 	if (addr_is_unspec(addr, addr_len)) {
398 		raw_disconnect(raw->raw_pcb);
399 
400 		return OK;
401 	}
402 
403 	if ((r = ipsock_get_dst_addr(rawsock_get_ipsock(raw), addr, addr_len,
404 	    &raw->raw_pcb->local_ip, &dst_addr, NULL /*dst_port*/)) != OK)
405 		return r;
406 
407 	/*
408 	 * Bind explicitly to a source address if the PCB is not bound to one
409 	 * yet.  This is expected in the BSD socket API, but lwIP does not do
410 	 * it for us.
411 	 */
412 	if (ip_addr_isany(&raw->raw_pcb->local_ip)) {
413 		/* Help the multicast case a bit, if possible. */
414 		ifdev = NULL;
415 		if (ip_addr_ismulticast(&dst_addr)) {
416 			ifindex = pktsock_get_ifindex(&raw->raw_pktsock);
417 			ifindex2 = raw_get_multicast_netif_index(raw->raw_pcb);
418 			if (ifindex == 0)
419 				ifindex = ifindex2;
420 
421 			if (ifindex != 0) {
422 				ifdev = ifdev_get_by_index(ifindex);
423 
424 				if (ifdev == NULL)
425 					return ENXIO;
426 			}
427 		}
428 
429 		src_addr = ifaddr_select(&dst_addr, ifdev, NULL /*ifdevp*/);
430 
431 		if (src_addr == NULL)
432 			return EHOSTUNREACH;
433 
434 		err = raw_bind(raw->raw_pcb, src_addr);
435 
436 		if (err != ERR_OK)
437 			return util_convert_err(err);
438 	}
439 
440 	/*
441 	 * Connecting a raw socket serves two main purposes: 1) the socket uses
442 	 * the address as destination when sending, and 2) the socket receives
443 	 * packets from only the connected address.
444 	 */
445 	err = raw_connect(raw->raw_pcb, &dst_addr);
446 
447 	if (err != ERR_OK)
448 		return util_convert_err(err);
449 
450 	return OK;
451 }
452 
453 /*
454  * Perform preliminary checks on a send request.
455  */
456 static int
457 rawsock_pre_send(struct sock * sock, size_t len, socklen_t ctl_len __unused,
458 	const struct sockaddr * addr, socklen_t addr_len __unused,
459 	endpoint_t user_endpt __unused, int flags)
460 {
461 	struct rawsock *raw = (struct rawsock *)sock;
462 
463 	if ((flags & ~MSG_DONTROUTE) != 0)
464 		return EOPNOTSUPP;
465 
466 	if (!rawsock_is_conn(raw) && addr == NULL)
467 		return EDESTADDRREQ;
468 
469 	/*
470 	 * This is only one part of the length check.  The rest is done from
471 	 * rawsock_send(), once we have more information.
472 	 */
473 	if (len > ipsock_get_sndbuf(rawsock_get_ipsock(raw)))
474 		return EMSGSIZE;
475 
476 	return OK;
477 }
478 
479 /*
480  * Swap IP-level options between the RAW PCB and the packet options structure,
481  * for all options that have their flag set in the packet options structure.
482  * This function is called twice when sending a packet.  The result is that the
483  * flagged options are overridden for only the packet being sent.
484  */
485 static void
486 rawsock_swap_opt(struct rawsock * raw, struct pktopt * pkto)
487 {
488 	uint8_t tos, ttl, mcast_ttl;
489 
490 	if (pkto->pkto_flags & PKTOF_TOS) {
491 		tos = raw->raw_pcb->tos;
492 		raw->raw_pcb->tos = pkto->pkto_tos;
493 		pkto->pkto_tos = tos;
494 	}
495 
496 	if (pkto->pkto_flags & PKTOF_TTL) {
497 		ttl = raw->raw_pcb->ttl;
498 		mcast_ttl = raw_get_multicast_ttl(raw->raw_pcb);
499 		raw->raw_pcb->ttl = pkto->pkto_ttl;
500 		raw_set_multicast_ttl(raw->raw_pcb, pkto->pkto_ttl);
501 		pkto->pkto_ttl = ttl;
502 		pkto->pkto_mcast_ttl = mcast_ttl;
503 	}
504 }
505 
506 /*
507  * We are about to send the given packet that already includes an IPv4 header,
508  * because the IP_HDRINCL option is enabled on a raw IPv4 socket.  Prepare the
509  * IPv4 header for sending, by modifying a few fields in it, as expected by
510  * userland.
511  */
512 static int
513 rawsock_prepare_hdrincl(struct rawsock * raw, struct pbuf * pbuf,
514 	const ip_addr_t * src_addr)
515 {
516 	struct ip_hdr *iphdr;
517 	size_t hlen;
518 
519 	/*
520 	 * lwIP obtains the destination address from the IP packet header in
521 	 * this case, so make sure the packet has a full-sized header.
522 	 */
523 	if (pbuf->len < sizeof(struct ip_hdr))
524 		return EINVAL;
525 
526 	iphdr = (struct ip_hdr *)pbuf->payload;
527 
528 	/*
529 	 * Fill in the source address if it is not set, and do the byte
530 	 * swapping and checksum computation common for the BSDs, without which
531 	 * ping(8) and traceroute(8) do not work properly.  We consider this a
532 	 * convenience feature, so malformed packets are simply sent as is.
533 	 * TODO: deal with type punning..
534 	 */
535 	hlen = (size_t)IPH_HL(iphdr) << 2;
536 
537 	if (pbuf->len >= hlen) {
538 		/* Fill in the source address if it is blank. */
539 		if (iphdr->src.addr == PP_HTONL(INADDR_ANY)) {
540 			assert(IP_IS_V4(src_addr));
541 
542 			iphdr->src.addr = ip_addr_get_ip4_u32(src_addr);
543 		}
544 
545 		IPH_LEN(iphdr) = htons(IPH_LEN(iphdr));
546 		IPH_OFFSET(iphdr) = htons(IPH_OFFSET(iphdr));
547 		IPH_CHKSUM(iphdr) = 0;
548 
549 		IPH_CHKSUM(iphdr) = inet_chksum(iphdr, hlen);
550 	}
551 
552 	return OK;
553 }
554 
555 /*
556  * Send a packet on a raw socket.
557  */
558 static int
559 rawsock_send(struct sock * sock, const struct sockdriver_data * data,
560 	size_t len, size_t * off, const struct sockdriver_data * ctl __unused,
561 	socklen_t ctl_len __unused, socklen_t * ctl_off __unused,
562 	const struct sockaddr * addr, socklen_t addr_len,
563 	endpoint_t user_endpt __unused, int flags, size_t min __unused)
564 {
565 	struct rawsock *raw = (struct rawsock *)sock;
566 	struct pktopt pktopt;
567 	struct pbuf *pbuf;
568 	struct ifdev *ifdev;
569 	struct netif *netif;
570 	const ip_addr_t *dst_addrp, *src_addrp;
571 	ip_addr_t src_addr, dst_addr; /* for storage only; not always used! */
572 	size_t hdrlen;
573 	uint32_t ifindex;
574 	err_t err;
575 	int r;
576 
577 	/* Copy in and parse any packet options. */
578 	pktopt.pkto_flags = 0;
579 
580 	if ((r = pktsock_get_ctl(&raw->raw_pktsock, ctl, ctl_len,
581 	    &pktopt)) != OK)
582 		return r;
583 
584 	/*
585 	 * For a more in-depth explanation of what is going on here, see the
586 	 * udpsock module, which has largely the same code but with more
587 	 * elaborate comments.
588 	 */
589 
590 	/*
591 	 * Start by checking whether the source address and/or the outgoing
592 	 * interface are overridden using sticky and/or ancillary options.
593 	 */
594 	if ((r = pktsock_get_pktinfo(&raw->raw_pktsock, &pktopt, &ifdev,
595 	    &src_addr)) != OK)
596 		return r;
597 
598 	if (ifdev != NULL && !ip_addr_isany(&src_addr)) {
599 		/* This is guaranteed to be a proper local unicast address. */
600 		src_addrp = &src_addr;
601 	} else {
602 		src_addrp = &raw->raw_pcb->local_ip;
603 
604 		/*
605 		 * If the socket is bound to a multicast address, use the
606 		 * unspecified ('any') address as source address instead.  A
607 		 * real source address will then be selected further below.
608 		 */
609 		if (ip_addr_ismulticast(src_addrp))
610 			src_addrp = IP46_ADDR_ANY(IP_GET_TYPE(src_addrp));
611 	}
612 
613 	/*
614 	 * Determine the destination address to use.  If the socket is
615 	 * connected, always ignore any address provided in the send call.
616 	 */
617 	if (!rawsock_is_conn(raw)) {
618 		assert(addr != NULL); /* already checked in pre_send */
619 
620 		if ((r = ipsock_get_dst_addr(rawsock_get_ipsock(raw), addr,
621 		    addr_len, src_addrp, &dst_addr, NULL /*dst_port*/)) != OK)
622 			return r;
623 
624 		dst_addrp = &dst_addr;
625 	} else
626 		dst_addrp = &raw->raw_pcb->remote_ip;
627 
628 	/*
629 	 * If the destination is a multicast address, select the outgoing
630 	 * interface based on the multicast interface index, if one is set.
631 	 * This must however *not* override an interface index already
632 	 * specified using IPV6_PKTINFO, as per RFC 3542 Sec. 6.7.
633 	 */
634 	if (ifdev == NULL && ip_addr_ismulticast(dst_addrp)) {
635 		ifindex = raw_get_multicast_netif_index(raw->raw_pcb);
636 
637 		if (ifindex != NETIF_NO_INDEX)
638 			ifdev = ifdev_get_by_index(ifindex); /* (may fail) */
639 	}
640 
641 	/*
642 	 * If an interface has been determined already now, the send operation
643 	 * will bypass routing.  In that case, we must perform our own checks
644 	 * on address zone violations, because those will not be made anywhere
645 	 * else.  Subsequent steps below will never introduce violations.
646 	 */
647 	if (ifdev != NULL && IP_IS_V6(dst_addrp)) {
648 		if (ifaddr_is_zone_mismatch(ip_2_ip6(dst_addrp), ifdev))
649 			return EHOSTUNREACH;
650 
651 		if (IP_IS_V6(src_addrp) &&
652 		    ifaddr_is_zone_mismatch(ip_2_ip6(src_addrp), ifdev))
653 			return EHOSTUNREACH;
654 	}
655 
656 	/*
657 	 * If we do not yet have an interface at this point, perform a route
658 	 * lookup to determine the outgoing interface, unless MSG_DONTROUTE is
659 	 * set.
660 	 */
661 	if (ifdev == NULL) {
662 		if (!(flags & MSG_DONTROUTE)) {
663 			/*
664 			 * ip_route() should never be called with an
665 			 * IPADDR_TYPE_ANY type address.  This is a lwIP-
666 			 * internal requirement; while we override both routing
667 			 * functions, we do not deviate from it.
668 			 */
669 			if (IP_IS_ANY_TYPE_VAL(*src_addrp))
670 				src_addrp =
671 				    IP46_ADDR_ANY(IP_GET_TYPE(dst_addrp));
672 
673 			/* Perform the route lookup. */
674 			if ((netif = ip_route(src_addrp, dst_addrp)) == NULL)
675 				return EHOSTUNREACH;
676 
677 			ifdev = netif_get_ifdev(netif);
678 		} else {
679 			if ((ifdev = ifaddr_map_by_subnet(dst_addrp)) == NULL)
680 				return EHOSTUNREACH;
681 		}
682 	}
683 
684 	/*
685 	 * At this point we have an outgoing interface.  If we do not have a
686 	 * source address yet, pick one now.  As a sidenote, if the destination
687 	 * address is scoped but has no zone, we could also fill in the zone
688 	 * now.  We let lwIP handle that instead, though.
689 	 */
690 	assert(ifdev != NULL);
691 
692 	if (ip_addr_isany(src_addrp)) {
693 		src_addrp = ifaddr_select(dst_addrp, ifdev, NULL /*ifdevp*/);
694 
695 		if (src_addrp == NULL)
696 			return EHOSTUNREACH;
697 	}
698 
699 	/*
700 	 * Now that we know the full conditions of what we are about to send,
701 	 * check whether the packet size leaves enough room for lwIP to prepend
702 	 * headers.  If so, allocate a chain of pbufs for the packet.
703 	 */
704 	assert(len <= RAW_MAX_PAYLOAD);
705 
706 	if (rawsock_is_hdrincl(raw))
707 		hdrlen = 0;
708 	else if (IP_IS_V6(dst_addrp))
709 		hdrlen = IP6_HLEN;
710 	else
711 		hdrlen = IP_HLEN;
712 
713 	if (hdrlen + len > RAW_MAX_PAYLOAD)
714 		return EMSGSIZE;
715 
716 	if ((pbuf = pchain_alloc(PBUF_IP, len)) == NULL)
717 		return ENOBUFS;
718 
719 	/* Copy in the packet data. */
720 	if ((r = pktsock_get_data(&raw->raw_pktsock, data, len, pbuf)) != OK) {
721 		pbuf_free(pbuf);
722 
723 		return r;
724 	}
725 
726 	/*
727 	 * If the user has turned on IPV6_CHECKSUM, ensure that the packet is
728 	 * not only large enough to have the checksum stored at the configured
729 	 * place, but also that the checksum fits within the first pbuf: if we
730 	 * do not test this here, an assert will trigger in lwIP later.  Also
731 	 * zero out the checksum field first, because lwIP does not do that.
732 	 */
733 	if (raw->raw_pcb->chksum_reqd) {
734 		if (pbuf->len < raw->raw_pcb->chksum_offset +
735 		    sizeof(uint16_t)) {
736 			pbuf_free(pbuf);
737 
738 			return EINVAL;
739 		}
740 
741 		memset((char *)pbuf->payload + raw->raw_pcb->chksum_offset, 0,
742 		    sizeof(uint16_t));
743 	}
744 
745 	/*
746 	 * For sockets where an IPv4 header is already included in the packet,
747 	 * we need to alter a few header fields to be compatible with BSD.
748 	 */
749 	if (rawsock_is_hdrincl(raw) &&
750 	    (r = rawsock_prepare_hdrincl(raw, pbuf, src_addrp)) != OK) {
751 		pbuf_free(pbuf);
752 
753 		return r;
754 	}
755 
756 	/* Set broadcast/multicast flags for accounting purposes. */
757 	if (ip_addr_ismulticast(dst_addrp))
758 		pbuf->flags |= PBUF_FLAG_LLMCAST;
759 	else if (ip_addr_isbroadcast(dst_addrp, ifdev_get_netif(ifdev)))
760 		pbuf->flags |= PBUF_FLAG_LLBCAST;
761 
762 	/* Send the packet. */
763 	rawsock_swap_opt(raw, &pktopt);
764 
765 	assert(!ip_addr_isany(src_addrp));
766 	assert(!ip_addr_ismulticast(src_addrp));
767 
768 	err = raw_sendto_if_src(raw->raw_pcb, pbuf, dst_addrp,
769 	    ifdev_get_netif(ifdev), src_addrp);
770 
771 	rawsock_swap_opt(raw, &pktopt);
772 
773 	/* Free the pbuf again. */
774 	pbuf_free(pbuf);
775 
776 	/*
777 	 * On success, make sure to return the size of the sent packet as well.
778 	 * As an aside: ctl_off need not be updated, as it is not returned.
779 	 */
780 	if ((r = util_convert_err(err)) == OK)
781 		*off = len;
782 	return r;
783 }
784 
785 /*
786  * Update the set of flag-type socket options on a raw socket.
787  */
788 static void
789 rawsock_setsockmask(struct sock * sock, unsigned int mask)
790 {
791 	struct rawsock *raw = (struct rawsock *)sock;
792 
793 	/*
794 	 * FIXME: raw sockets are not supposed to have a broardcast check, so
795 	 * perhaps just remove this and instead always set SOF_BROADCAST?
796 	 */
797 	if (mask & SO_BROADCAST)
798 		ip_set_option(raw->raw_pcb, SOF_BROADCAST);
799 	else
800 		ip_reset_option(raw->raw_pcb, SOF_BROADCAST);
801 }
802 
803 /*
804  * Prepare a helper structure for IP-level option processing.
805  */
806 static void
807 rawsock_get_ipopts(struct rawsock * raw, struct ipopts * ipopts)
808 {
809 
810 	ipopts->local_ip = &raw->raw_pcb->local_ip;
811 	ipopts->remote_ip = &raw->raw_pcb->remote_ip;
812 	ipopts->tos = &raw->raw_pcb->tos;
813 	ipopts->ttl = &raw->raw_pcb->ttl;
814 	ipopts->sndmin = RAW_SNDBUF_MIN;
815 	ipopts->sndmax = RAW_SNDBUF_MAX;
816 	ipopts->rcvmin = RAW_RCVBUF_MIN;
817 	ipopts->rcvmax = RAW_RCVBUF_MAX;
818 }
819 
820 /*
821  * Set socket options on a raw socket.
822  */
823 static int
824 rawsock_setsockopt(struct sock * sock, int level, int name,
825 	const struct sockdriver_data * data, socklen_t len)
826 {
827 	struct rawsock *raw = (struct rawsock *)sock;
828 	struct ipopts ipopts;
829 	struct icmp6_filter filter;
830 	ip_addr_t ipaddr;
831 	struct in_addr in_addr;
832 	struct ifdev *ifdev;
833 	unsigned int flags;
834 	uint32_t ifindex;
835 	uint8_t byte;
836 	int r, val;
837 
838 	/*
839 	 * Unfortunately, we have to duplicate most of the multicast options
840 	 * rather than sharing them with udpsock at the pktsock level.  The
841 	 * reason is that each of the PCBs have their own multicast abstraction
842 	 * functions and so we cannot merge the rest.  Same for getsockopt.
843 	 */
844 
845 	switch (level) {
846 	case IPPROTO_IP:
847 		if (rawsock_is_ipv6(raw))
848 			break;
849 
850 		switch (name) {
851 		case IP_HDRINCL:
852 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
853 			    len)) != OK)
854 				return r;
855 
856 			if (val) {
857 				raw_setflags(raw->raw_pcb,
858 				    raw_flags(raw->raw_pcb) |
859 				    RAW_FLAGS_HDRINCL);
860 			} else {
861 				raw_setflags(raw->raw_pcb,
862 				    raw_flags(raw->raw_pcb) &
863 				    ~RAW_FLAGS_HDRINCL);
864 			}
865 
866 			return OK;
867 
868 		case IP_MULTICAST_IF:
869 			pktsock_set_mcaware(&raw->raw_pktsock);
870 
871 			if ((r = sockdriver_copyin_opt(data, &in_addr,
872 			    sizeof(in_addr), len)) != OK)
873 				return r;
874 
875 			ip_addr_set_ip4_u32(&ipaddr, in_addr.s_addr);
876 
877 			if ((ifdev = ifaddr_map_by_addr(&ipaddr)) == NULL)
878 				return EADDRNOTAVAIL;
879 
880 			raw_set_multicast_netif_index(raw->raw_pcb,
881 			    ifdev_get_index(ifdev));
882 
883 			return OK;
884 
885 		case IP_MULTICAST_LOOP:
886 			pktsock_set_mcaware(&raw->raw_pktsock);
887 
888 			if ((r = sockdriver_copyin_opt(data, &byte,
889 			    sizeof(byte), len)) != OK)
890 				return r;
891 
892 			flags = raw_flags(raw->raw_pcb);
893 
894 			if (byte)
895 				flags |= RAW_FLAGS_MULTICAST_LOOP;
896 			else
897 				flags &= ~RAW_FLAGS_MULTICAST_LOOP;
898 
899 			raw_setflags(raw->raw_pcb, flags);
900 
901 			return OK;
902 
903 		case IP_MULTICAST_TTL:
904 			pktsock_set_mcaware(&raw->raw_pktsock);
905 
906 			if ((r = sockdriver_copyin_opt(data, &byte,
907 			    sizeof(byte), len)) != OK)
908 				return r;
909 
910 			raw_set_multicast_ttl(raw->raw_pcb, byte);
911 
912 			return OK;
913 		}
914 
915 		break;
916 
917 	case IPPROTO_IPV6:
918 		if (!rawsock_is_ipv6(raw))
919 			break;
920 
921 		switch (name) {
922 		case IPV6_CHECKSUM:
923 			/* ICMPv6 checksums are always computed. */
924 			if (raw->raw_pcb->protocol == IPPROTO_ICMPV6)
925 				return EINVAL;
926 
927 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
928 			    len)) != OK)
929 				return r;
930 
931 			if (val == -1) {
932 				raw->raw_pcb->chksum_reqd = 0;
933 
934 				return OK;
935 			} else if (val >= 0 && !(val & 1)) {
936 				raw->raw_pcb->chksum_reqd = 1;
937 				raw->raw_pcb->chksum_offset = val;
938 
939 				return OK;
940 			} else
941 				return EINVAL;
942 
943 		case IPV6_MULTICAST_IF:
944 			pktsock_set_mcaware(&raw->raw_pktsock);
945 
946 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
947 			    len)) != OK)
948 				return r;
949 
950 			if (val != 0) {
951 				ifindex = (uint32_t)val;
952 
953 				ifdev = ifdev_get_by_index(ifindex);
954 
955 				if (ifdev == NULL)
956 					return ENXIO;
957 			} else
958 				ifindex = NETIF_NO_INDEX;
959 
960 			raw_set_multicast_netif_index(raw->raw_pcb, ifindex);
961 
962 			return OK;
963 
964 		case IPV6_MULTICAST_LOOP:
965 			pktsock_set_mcaware(&raw->raw_pktsock);
966 
967 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
968 			    len)) != OK)
969 				return r;
970 
971 			if (val < 0 || val > 1)
972 				return EINVAL;
973 
974 			flags = raw_flags(raw->raw_pcb);
975 
976 			if (val)
977 				flags |= RAW_FLAGS_MULTICAST_LOOP;
978 			else
979 				flags &= ~RAW_FLAGS_MULTICAST_LOOP;
980 
981 			/*
982 			 * lwIP's IPv6 functionality does not actually check
983 			 * this flag at all yet.  We set it in the hope that
984 			 * one day this will magically start working.
985 			 */
986 			raw_setflags(raw->raw_pcb, flags);
987 
988 			return OK;
989 
990 		case IPV6_MULTICAST_HOPS:
991 			pktsock_set_mcaware(&raw->raw_pktsock);
992 
993 			if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
994 			    len)) != OK)
995 				return r;
996 
997 			if (val < -1 || val > UINT8_MAX)
998 				return EINVAL;
999 
1000 			if (val == -1)
1001 				val = 1;
1002 
1003 			raw_set_multicast_ttl(raw->raw_pcb, val);
1004 
1005 			return OK;
1006 		}
1007 
1008 		break;
1009 
1010 	case IPPROTO_ICMPV6:
1011 		if (!rawsock_is_ipv6(raw) ||
1012 		    raw->raw_pcb->protocol != IPPROTO_ICMPV6)
1013 			break;
1014 
1015 		switch (name) {
1016 		case ICMP6_FILTER:
1017 			/* Who comes up with these stupid exceptions? */
1018 			if (len == 0) {
1019 				ICMP6_FILTER_SETPASSALL(&raw->raw_icmp6filter);
1020 
1021 				return OK;
1022 			}
1023 
1024 			if ((r = sockdriver_copyin_opt(data, &filter,
1025 			    sizeof(filter), len)) != OK)
1026 				return r;
1027 
1028 			/*
1029 			 * As always, never copy in the data into the actual
1030 			 * destination, as any copy may run into a copy fault
1031 			 * halfway through, potentially leaving the destination
1032 			 * in a half-updated and thus corrupted state.
1033 			 */
1034 			memcpy(&raw->raw_icmp6filter, &filter, sizeof(filter));
1035 
1036 			return OK;
1037 		}
1038 	}
1039 
1040 	rawsock_get_ipopts(raw, &ipopts);
1041 
1042 	return pktsock_setsockopt(&raw->raw_pktsock, level, name, data, len,
1043 	    &ipopts);
1044 }
1045 
1046 /*
1047  * Retrieve socket options on a raw socket.
1048  */
1049 static int
1050 rawsock_getsockopt(struct sock * sock, int level, int name,
1051 	const struct sockdriver_data * data, socklen_t * len)
1052 {
1053 	struct rawsock *raw = (struct rawsock *)sock;
1054 	struct ipopts ipopts;
1055 	const ip4_addr_t *ip4addr;
1056 	struct in_addr in_addr;
1057 	struct ifdev *ifdev;
1058 	unsigned int flags;
1059 	uint32_t ifindex;
1060 	uint8_t byte;
1061 	int val;
1062 
1063 	switch (level) {
1064 	case IPPROTO_IP:
1065 		if (rawsock_is_ipv6(raw))
1066 			break;
1067 
1068 		switch (name) {
1069 		case IP_HDRINCL:
1070 			val = !!rawsock_is_hdrincl(raw);
1071 
1072 			return sockdriver_copyout_opt(data, &val, sizeof(val),
1073 			    len);
1074 
1075 		case IP_MULTICAST_IF:
1076 			ifindex = raw_get_multicast_netif_index(raw->raw_pcb);
1077 
1078 			/*
1079 			 * Map back from the interface index to the IPv4
1080 			 * address assigned to the corresponding interface.
1081 			 * Should this not work out, return the 'any' address.
1082 			 */
1083 			if (ifindex != NETIF_NO_INDEX &&
1084 			   (ifdev = ifdev_get_by_index(ifindex)) != NULL) {
1085 				ip4addr =
1086 				    netif_ip4_addr(ifdev_get_netif(ifdev));
1087 
1088 				in_addr.s_addr = ip4_addr_get_u32(ip4addr);
1089 			} else
1090 				in_addr.s_addr = PP_HTONL(INADDR_ANY);
1091 
1092 			return sockdriver_copyout_opt(data, &in_addr,
1093 			    sizeof(in_addr), len);
1094 
1095 		case IP_MULTICAST_LOOP:
1096 			flags = raw_flags(raw->raw_pcb);
1097 
1098 			byte = !!(flags & RAW_FLAGS_MULTICAST_LOOP);
1099 
1100 			return sockdriver_copyout_opt(data, &byte,
1101 			    sizeof(byte), len);
1102 
1103 		case IP_MULTICAST_TTL:
1104 			byte = raw_get_multicast_ttl(raw->raw_pcb);
1105 
1106 			return sockdriver_copyout_opt(data, &byte,
1107 			    sizeof(byte), len);
1108 		}
1109 
1110 		break;
1111 
1112 	case IPPROTO_IPV6:
1113 		if (!rawsock_is_ipv6(raw))
1114 			break;
1115 
1116 		switch (name) {
1117 		case IPV6_CHECKSUM:
1118 			if (raw->raw_pcb->chksum_reqd)
1119 				val = raw->raw_pcb->chksum_offset;
1120 			else
1121 				val = -1;
1122 
1123 			return sockdriver_copyout_opt(data, &val, sizeof(val),
1124 			    len);
1125 
1126 		case IPV6_MULTICAST_IF:
1127 			ifindex = raw_get_multicast_netif_index(raw->raw_pcb);
1128 
1129 			val = (int)ifindex;
1130 
1131 			return sockdriver_copyout_opt(data, &val, sizeof(val),
1132 			    len);
1133 
1134 		case IPV6_MULTICAST_LOOP:
1135 			flags = raw_flags(raw->raw_pcb);
1136 
1137 			val = !!(flags & RAW_FLAGS_MULTICAST_LOOP);
1138 
1139 			return sockdriver_copyout_opt(data, &val, sizeof(val),
1140 			    len);
1141 
1142 		case IPV6_MULTICAST_HOPS:
1143 			val = raw_get_multicast_ttl(raw->raw_pcb);
1144 
1145 			return sockdriver_copyout_opt(data, &val, sizeof(val),
1146 			    len);
1147 		}
1148 
1149 		break;
1150 
1151 	case IPPROTO_ICMPV6:
1152 		if (!rawsock_is_ipv6(raw) ||
1153 		    raw->raw_pcb->protocol != IPPROTO_ICMPV6)
1154 			break;
1155 
1156 		switch (name) {
1157 		case ICMP6_FILTER:
1158 			return sockdriver_copyout_opt(data,
1159 			    &raw->raw_icmp6filter,
1160 			    sizeof(raw->raw_icmp6filter), len);
1161 		}
1162 
1163 		break;
1164 	}
1165 
1166 	rawsock_get_ipopts(raw, &ipopts);
1167 
1168 	return pktsock_getsockopt(&raw->raw_pktsock, level, name, data, len,
1169 	    &ipopts);
1170 }
1171 
1172 /*
1173  * Retrieve the local socket address of a raw socket.
1174  */
1175 static int
1176 rawsock_getsockname(struct sock * sock, struct sockaddr * addr,
1177 	socklen_t * addr_len)
1178 {
1179 	struct rawsock *raw = (struct rawsock *)sock;
1180 
1181 	ipsock_put_addr(rawsock_get_ipsock(raw), addr, addr_len,
1182 	    &raw->raw_pcb->local_ip, 0 /*port*/);
1183 
1184 	return OK;
1185 }
1186 
1187 /*
1188  * Retrieve the remote socket address of a raw socket.
1189  */
1190 static int
1191 rawsock_getpeername(struct sock * sock, struct sockaddr * addr,
1192 	socklen_t * addr_len)
1193 {
1194 	struct rawsock *raw = (struct rawsock *)sock;
1195 
1196 	if (!rawsock_is_conn(raw))
1197 		return ENOTCONN;
1198 
1199 	ipsock_put_addr(rawsock_get_ipsock(raw), addr, addr_len,
1200 	    &raw->raw_pcb->remote_ip, 0 /*port*/);
1201 
1202 	return OK;
1203 }
1204 
1205 /*
1206  * Shut down a raw socket for reading and/or writing.
1207  */
1208 static int
1209 rawsock_shutdown(struct sock * sock, unsigned int mask)
1210 {
1211 	struct rawsock *raw = (struct rawsock *)sock;
1212 
1213 	if (mask & SFL_SHUT_RD)
1214 		raw_recv(raw->raw_pcb, NULL, NULL);
1215 
1216 	pktsock_shutdown(&raw->raw_pktsock, mask);
1217 
1218 	return OK;
1219 }
1220 
1221 /*
1222  * Close a raw socket.
1223  */
1224 static int
1225 rawsock_close(struct sock * sock, int force __unused)
1226 {
1227 	struct rawsock *raw = (struct rawsock *)sock;
1228 
1229 	raw_recv(raw->raw_pcb, NULL, NULL);
1230 
1231 	raw_remove(raw->raw_pcb);
1232 	raw->raw_pcb = NULL;
1233 
1234 	pktsock_close(&raw->raw_pktsock);
1235 
1236 	return OK;
1237 }
1238 
1239 /*
1240  * Free up a closed raw socket.
1241  */
1242 static void
1243 rawsock_free(struct sock * sock)
1244 {
1245 	struct rawsock *raw = (struct rawsock *)sock;
1246 
1247 	assert(raw->raw_pcb == NULL);
1248 
1249 	TAILQ_REMOVE(&raw_activelist, raw, raw_next);
1250 
1251 	TAILQ_INSERT_HEAD(&raw_freelist, raw, raw_next);
1252 }
1253 
1254 /*
1255  * Fill the given kinfo_pcb sysctl(7) structure with information about the RAW
1256  * PCB identified by the given pointer.
1257  */
1258 static void
1259 rawsock_get_info(struct kinfo_pcb * ki, const void * ptr)
1260 {
1261 	const struct raw_pcb *pcb = (const struct raw_pcb *)ptr;
1262 	struct rawsock *raw;
1263 
1264 	/* We iterate our own list so we can't find "strange" PCBs. */
1265 	raw = (struct rawsock *)pcb->recv_arg;
1266 	assert(raw >= raw_array &&
1267 	    raw < &raw_array[__arraycount(raw_array)]);
1268 
1269 	ki->ki_type = SOCK_RAW;
1270 	ki->ki_protocol = pcb->protocol;
1271 
1272 	ipsock_get_info(ki, &pcb->local_ip, 0 /*local_port*/,
1273 	    &raw->raw_pcb->remote_ip, 0 /*remote_port*/);
1274 
1275 	/* TODO: change this so that sockstat(1) may work one day. */
1276 	ki->ki_sockaddr = (uint64_t)(uintptr_t)rawsock_get_sock(raw);
1277 
1278 	ki->ki_rcvq = pktsock_get_recvlen(&raw->raw_pktsock);
1279 
1280 	if (rawsock_is_hdrincl(raw))
1281 		ki->ki_pflags |= INP_HDRINCL;
1282 }
1283 
1284 /*
1285  * Given either NULL or a previously returned RAW PCB pointer, return the first
1286  * or next RAW PCB pointer, or NULL if there are no more.  lwIP does not expose
1287  * 'raw_pcbs', but other modules in this service may also use RAW PCBs (which
1288  * should then stay hidden), so we iterate through our own list instead.
1289  */
1290 static const void *
1291 rawsock_enum(const void * last)
1292 {
1293 	const struct raw_pcb *pcb;
1294 	struct rawsock *raw;
1295 
1296 	if (last != NULL) {
1297 		pcb = (const struct raw_pcb *)last;
1298 
1299 		raw = (struct rawsock *)pcb->recv_arg;
1300 		assert(raw >= raw_array &&
1301 		    raw < &raw_array[__arraycount(raw_array)]);
1302 
1303 		raw = TAILQ_NEXT(raw, raw_next);
1304 	} else
1305 		raw = TAILQ_FIRST(&raw_activelist);
1306 
1307 	if (raw != NULL)
1308 		return raw->raw_pcb;
1309 	else
1310 		return NULL;
1311 }
1312 
1313 /*
1314  * Obtain the list of RAW protocol control blocks, for sysctl(7).
1315  */
1316 static ssize_t
1317 rawsock_pcblist(struct rmib_call * call, struct rmib_node * node,
1318 	struct rmib_oldp * oldp, struct rmib_newp * newp __unused)
1319 {
1320 
1321 	return util_pcblist(call, oldp, rawsock_enum, rawsock_get_info);
1322 }
1323 
1324 static const struct sockevent_ops rawsock_ops = {
1325 	.sop_bind		= rawsock_bind,
1326 	.sop_connect		= rawsock_connect,
1327 	.sop_pre_send		= rawsock_pre_send,
1328 	.sop_send		= rawsock_send,
1329 	.sop_pre_recv		= pktsock_pre_recv,
1330 	.sop_recv		= pktsock_recv,
1331 	.sop_test_recv		= pktsock_test_recv,
1332 	.sop_ioctl		= ifconf_ioctl,
1333 	.sop_setsockmask	= rawsock_setsockmask,
1334 	.sop_setsockopt		= rawsock_setsockopt,
1335 	.sop_getsockopt		= rawsock_getsockopt,
1336 	.sop_getsockname	= rawsock_getsockname,
1337 	.sop_getpeername	= rawsock_getpeername,
1338 	.sop_shutdown		= rawsock_shutdown,
1339 	.sop_close		= rawsock_close,
1340 	.sop_free		= rawsock_free
1341 };
1342