xref: /minix/minix/tests/test94.c (revision fb9c64b2)
1 /* Tests for BPF devices (LWIP) - by D.C. van Moolenbroek */
2 /* This test needs to be run as root: opening BPF devices is root-only. */
3 /*
4  * We do not attempt to test the BPF filter code here.  Such a test is better
5  * done through standardized tests and with direct use of the filter code.
6  * The current BPF filter implementation has been run through the FreeBSD
7  * BPF filter regression tests (from their tools/regression/bpf/bpf_filter), of
8  * which only the last test (0084 - "Check very long BPF program") failed due
9  * to our lower and strictly enforced BPF_MAXINSNS value.  Future modifications
10  * of the BPF filter code should be tested against at least that test set.
11  */
12 #include <stdlib.h>
13 #include <string.h>
14 #include <signal.h>
15 #include <sys/ioctl.h>
16 #include <sys/socket.h>
17 #include <sys/sysctl.h>
18 #include <sys/wait.h>
19 #include <net/bpf.h>
20 #include <net/bpfdesc.h>
21 #include <net/if.h>
22 #include <net/if_types.h>
23 #include <net/if_ether.h>
24 #include <net/if_dl.h>
25 #include <netinet/in.h>
26 #include <netinet/ip.h>
27 #include <netinet/ip6.h>
28 #include <netinet/udp.h>
29 #include <ifaddrs.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 #include <limits.h>
33 #include <paths.h>
34 #include <pwd.h>
35 
36 #include "common.h"
37 
38 #define ITERATIONS	2
39 
40 #define LOOPBACK_IFNAME	"lo0"
41 
42 #define TEST_PORT_A	12345
43 #define TEST_PORT_B	12346
44 
45 #define SLEEP_TIME	250000	/* (us) - increases may require code changes */
46 
47 #define NONROOT_USER	"bin"	/* name of any unprivileged user */
48 
49 #ifdef NO_INET6
50 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
51 #endif /* NO_INET6 */
52 
53 static unsigned int got_signal;
54 
55 /*
56  * Signal handler.
57  */
58 static void
59 test94_signal(int sig)
60 {
61 
62 	if (sig != SIGUSR1) e(0);
63 
64 	got_signal++;
65 }
66 
67 /*
68  * Send UDP packets on the given socket 'fd' so as to fill up a BPF store
69  * buffer of size 'size' exactly.  The provided buffer 'buf' may be used for
70  * packet generation and is at least of 'size' bytes.  Return the number of
71  * packets sent.
72  */
73 static uint32_t
74 test94_fill_exact(int fd, uint8_t * buf, size_t size, uint32_t seq)
75 {
76 	size_t hdrlen, len;
77 
78 	hdrlen = BPF_WORDALIGN(sizeof(struct bpf_hdr)) + sizeof(struct ip) +
79 	    sizeof(struct udphdr) + sizeof(seq);
80 
81 	for (len = 16; len <= hdrlen; len <<= 1);
82 	if (len > size) e(0);
83 
84 	hdrlen = BPF_WORDALIGN(hdrlen - sizeof(seq));
85 
86 	for (; size > 0; seq++) {
87 		memset(buf, 'Y', len - hdrlen);
88 		if (len - hdrlen > sizeof(seq))
89 			buf[sizeof(seq)] = 'X';
90 		buf[len - hdrlen - 1] = 'Z';
91 		memcpy(buf, &seq, sizeof(seq));
92 
93 		if (write(fd, buf, len - hdrlen) != len - hdrlen) e(0);
94 
95 		size -= len;
96 	}
97 
98 	return seq;
99 }
100 
101 /*
102  * Send UDP packets on the given socket 'fd' so as to fill up at least a BPF
103  * store buffer of size 'size', with at least one more packet being sent.  The
104  * provided buffer 'buf' may be used for packet generation and is at least of
105  * 'size' bytes.
106  */
107 static void
108 test94_fill_random(int fd, uint8_t * buf, size_t size)
109 {
110 	size_t hdrlen, len;
111 	ssize_t left;
112 	uint32_t seq;
113 
114 	hdrlen = BPF_WORDALIGN(BPF_WORDALIGN(sizeof(struct bpf_hdr)) +
115 	    sizeof(struct ip) + sizeof(struct udphdr));
116 
117 	/* Even if we fill the buffer exactly, we send one more packet. */
118 	for (left = (ssize_t)size, seq = 1; left >= 0; seq++) {
119 		len = hdrlen + sizeof(seq) + lrand48() % (size / 10);
120 
121 		memset(buf, 'Y', len - hdrlen);
122 		if (len - hdrlen > sizeof(seq))
123 			buf[sizeof(seq)] = 'X';
124 		buf[len - hdrlen - 1] = 'Z';
125 		memcpy(buf, &seq, sizeof(seq));
126 
127 		if (write(fd, buf, len - hdrlen) != len - hdrlen) e(0);
128 
129 		left -= BPF_WORDALIGN(len);
130 	}
131 }
132 
133 /*
134  * Send a UDP packet with a specific size of 'size' bytes and sequence number
135  * 'seq' on socket 'fd', using 'buf' as scratch buffer.
136  */
137 static void
138 test94_add_specific(int fd, uint8_t * buf, size_t size, uint32_t seq)
139 {
140 
141 	size += sizeof(seq);
142 
143 	memset(buf, 'Y', size);
144 	if (size > sizeof(seq))
145 		buf[sizeof(seq)] = 'X';
146 	buf[size - 1] = 'Z';
147 	memcpy(buf, &seq, sizeof(seq));
148 
149 	if (write(fd, buf, size) != size) e(0);
150 }
151 
152 /*
153  * Send a randomly sized, relatively small UDP packet on the given socket 'fd',
154  * using sequence number 'seq'.  The buffer 'buf' may be used as scratch buffer
155  * which is at most 'size' bytes--the same size as the total BPF buffer.
156  */
157 static void
158 test94_add_random(int fd, uint8_t * buf, size_t size, uint32_t seq)
159 {
160 
161 	test94_add_specific(fd, buf, lrand48() % (size / 10), seq);
162 }
163 
164 /*
165  * Check whether the packet in 'buf' of 'caplen' captured bytes out of
166  * 'datalen' data bytes is one we sent.  If so, return an offset to the packet
167  * data.  If not, return a negative value.
168  */
169 static ssize_t
170 test94_check_pkt(uint8_t * buf, ssize_t caplen, ssize_t datalen)
171 {
172 	struct ip ip;
173 	struct udphdr uh;
174 
175 	if (caplen < sizeof(ip))
176 		return -1;
177 
178 	memcpy(&ip, buf, sizeof(ip));
179 
180 	if (ip.ip_v != IPVERSION)
181 		return -1;
182 	if (ip.ip_hl != sizeof(ip) >> 2)
183 		return -1;
184 	if (ip.ip_p != IPPROTO_UDP)
185 		return -1;
186 
187 	if (caplen - sizeof(ip) < sizeof(uh))
188 		return -1;
189 
190 	memcpy(&uh, buf + sizeof(ip), sizeof(uh));
191 
192 	if (uh.uh_sport != htons(TEST_PORT_A))
193 		return -1;
194 	if (uh.uh_dport != htons(TEST_PORT_B))
195 		return -1;
196 
197 	if (datalen - sizeof(ip) != ntohs(uh.uh_ulen)) e(0);
198 
199 	return sizeof(ip) + sizeof(uh);
200 }
201 
202 /*
203  * Check whether the capture in 'buf' of 'len' bytes looks like a valid set of
204  * captured packets.  The valid packets start from sequence number 'seq'; the
205  * next expected sequence number is returned.  If 'filtered' is set, there
206  * should be no other packets in the capture; otherwise, other packets are
207  * ignored.
208  */
209 static uint32_t
210 test94_check(uint8_t * buf, ssize_t len, uint32_t seq, int filtered,
211 	uint32_t * caplen, uint32_t * datalen)
212 {
213 	struct bpf_hdr bh;
214 	ssize_t off;
215 	uint32_t nseq;
216 
217 	while (len > 0) {
218 		/*
219 		 * We rely on the assumption that the last packet in the buffer
220 		 * is padded to alignment as well; if not, this check fails.
221 		 */
222 		if (len < BPF_WORDALIGN(sizeof(bh))) e(0);
223 
224 		memcpy(&bh, buf, sizeof(bh));
225 
226 		/*
227 		 * The timestamp fields should be filled in.  The tests that
228 		 * use this function do not set a capture length below the
229 		 * packet length.  The header must be exactly as large as we
230 		 * expect: no small-size tricks (as NetBSD uses) and no
231 		 * unexpected extra padding.
232 		 */
233 		if (bh.bh_tstamp.tv_sec == 0 && bh.bh_tstamp.tv_usec == 0)
234 		     e(0);
235 		if (caplen != NULL) {
236 			if (bh.bh_caplen != *caplen) e(0);
237 			if (bh.bh_datalen != *datalen) e(0);
238 
239 			caplen++;
240 			datalen++;
241 		} else
242 			if (bh.bh_datalen != bh.bh_caplen) e(0);
243 		if (bh.bh_hdrlen != BPF_WORDALIGN(sizeof(bh))) e(0);
244 
245 		if (bh.bh_hdrlen + BPF_WORDALIGN(bh.bh_caplen) > len) e(0);
246 
247 		buf += bh.bh_hdrlen;
248 		len -= bh.bh_hdrlen;
249 
250 		if ((off = test94_check_pkt(buf, bh.bh_caplen,
251 		    bh.bh_datalen)) < 0) {
252 			if (filtered) e(0);
253 
254 			buf += BPF_WORDALIGN(bh.bh_caplen);
255 			len -= BPF_WORDALIGN(bh.bh_caplen);
256 
257 			continue;
258 		}
259 
260 		if (bh.bh_caplen < off + sizeof(seq)) e(0);
261 
262 		memcpy(&nseq, &buf[off], sizeof(nseq));
263 
264 		if (nseq != seq++) e(0);
265 
266 		off += sizeof(seq);
267 		if (off < bh.bh_caplen) {
268 			/* If there is just one byte, it is 'Z'. */
269 			if (off < bh.bh_caplen && off < bh.bh_datalen - 1) {
270 				if (buf[off] != 'X') e(0);
271 
272 				for (off++; off < bh.bh_caplen &&
273 				    off < bh.bh_datalen - 1; off++)
274 					if (buf[off] != 'Y') e(0);
275 			}
276 			if (off < bh.bh_caplen && off == bh.bh_datalen - 1 &&
277 			    buf[off] != 'Z') e(0);
278 		}
279 
280 		buf += BPF_WORDALIGN(bh.bh_caplen);
281 		len -= BPF_WORDALIGN(bh.bh_caplen);
282 	}
283 
284 	return seq;
285 }
286 
287 /*
288  * Filter program to ensure that the given (datalink-headerless) packet is an
289  * IPv4 UDP packet from port 12345 to port 12346.  Important: the 'k' value of
290  * the last instruction must be the accepted packet size, and is modified by
291  * some of the tests further down!
292  */
293 static struct bpf_insn test94_filter[] = {
294 	{ BPF_LD+BPF_B+BPF_ABS, 0, 0, 0 },	/* is this an IPv4 header? */
295 	{ BPF_ALU+BPF_RSH+BPF_K, 0, 0, 4 },
296 	{ BPF_JMP+BPF_JEQ+BPF_K, 0, 7, 4 },
297 	{ BPF_LD+BPF_B+BPF_ABS, 0, 0, 9 },	/* is this a UDP packet? */
298 	{ BPF_JMP+BPF_JEQ+BPF_K, 0, 5, IPPROTO_UDP },
299 	{ BPF_LDX+BPF_B+BPF_MSH, 0, 0, 0 },
300 	{ BPF_LD+BPF_H+BPF_IND, 0, 0, 0 },	/* source port 12345? */
301 	{ BPF_JMP+BPF_JEQ+BPF_K, 0, 2, TEST_PORT_A },
302 	{ BPF_LD+BPF_H+BPF_IND, 0, 0, 2 },	/* destination port 12346? */
303 	{ BPF_JMP+BPF_JEQ+BPF_K, 1, 0, TEST_PORT_B },
304 	{ BPF_RET+BPF_K, 0, 0, 0 },		/* reject the packet */
305 	{ BPF_RET+BPF_K, 0, 0, (uint32_t)-1 },	/* accept the (whole) packet */
306 };
307 
308 /*
309  * Set up a BPF device, a pair of sockets of which traffic will be captured on
310  * the BPF device, a buffer for capturing packets, and optionally a filter.
311  * If the given size is non-zero, use that as buffer size.  Return the BPF
312  * device's actual buffer size, which is also the size of 'buf'.
313  */
314 static size_t
315 test94_setup(int * fd, int * fd2, int * fd3, uint8_t ** buf, unsigned int size,
316 	int set_filter)
317 {
318 	struct sockaddr_in sinA, sinB;
319 	struct ifreq ifr;
320 	struct bpf_program bf;
321 	unsigned int dlt;
322 
323 	if ((*fd = open(_PATH_BPF, O_RDWR)) < 0) e(0);
324 
325 	if (size != 0 && ioctl(*fd, BIOCSBLEN, &size) != 0) e(0);
326 
327 	if (ioctl(*fd, BIOCGBLEN, &size) != 0) e(0);
328 	if (size < 1024 || size > BPF_MAXBUFSIZE) e(0);
329 
330 	if ((*buf = malloc(size)) == NULL) e(0);
331 
332 	if (set_filter) {
333 		/*
334 		 * Install a filter to improve predictability for the tests.
335 		 */
336 		memset(&bf, 0, sizeof(bf));
337 		bf.bf_len = __arraycount(test94_filter);
338 		bf.bf_insns = test94_filter;
339 		if (ioctl(*fd, BIOCSETF, &bf) != 0) e(0);
340 	}
341 
342 	/* Bind to the loopback device. */
343 	memset(&ifr, 0, sizeof(ifr));
344 	strlcpy(ifr.ifr_name, LOOPBACK_IFNAME, sizeof(ifr.ifr_name));
345 	if (ioctl(*fd, BIOCSETIF, &ifr) != 0) e(0);
346 
347 	/*
348 	 * If the loopback device's data link type is not DLT_RAW, our filter
349 	 * and size calculations will not work.
350 	 */
351 	if (ioctl(*fd, BIOCGDLT, &dlt) != 0) e(0);
352 	if (dlt != DLT_RAW) e(0);
353 
354 	/* We use UDP traffic for our test packets. */
355 	if ((*fd2 = socket(AF_INET, SOCK_DGRAM, 0)) < 0) e(0);
356 
357 	memset(&sinA, 0, sizeof(sinA));
358 	sinA.sin_family = AF_INET;
359 	sinA.sin_port = htons(TEST_PORT_A);
360 	sinA.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
361 	if (bind(*fd2, (struct sockaddr *)&sinA, sizeof(sinA)) != 0) e(0);
362 
363 	memcpy(&sinB, &sinA, sizeof(sinB));
364 	sinB.sin_port = htons(TEST_PORT_B);
365 	if (connect(*fd2, (struct sockaddr *)&sinB, sizeof(sinB)) != 0) e(0);
366 
367 	if ((*fd3 = socket(AF_INET, SOCK_DGRAM, 0)) < 0) e(0);
368 
369 	if (bind(*fd3, (struct sockaddr *)&sinB, sizeof(sinB)) != 0) e(0);
370 
371 	if (connect(*fd3, (struct sockaddr *)&sinA, sizeof(sinA)) != 0) e(0);
372 
373 	return size;
374 }
375 
376 /*
377  * Clean up resources allocated by test94_setup().
378  */
379 static void
380 test94_cleanup(int fd, int fd2, int fd3, uint8_t * buf)
381 {
382 
383 	if (close(fd3) != 0) e(0);
384 
385 	if (close(fd2) != 0) e(0);
386 
387 	free(buf);
388 
389 	if (close(fd) != 0) e(0);
390 }
391 
392 /*
393  * Test reading packets from a BPF device, using regular mode.
394  */
395 static void
396 test94a(void)
397 {
398 	struct bpf_program bf;
399 	struct timeval tv;
400 	fd_set fds;
401 	uint8_t *buf;
402 	pid_t pid;
403 	size_t size;
404 	ssize_t len;
405 	uint32_t seq;
406 	int fd, fd2, fd3, status, bytes, fl;
407 
408 	subtest = 1;
409 
410 	size = test94_setup(&fd, &fd2, &fd3, &buf, 0 /*size*/,
411 	    0 /*set_filter*/);
412 
413 	/*
414 	 * Test that a filled-up store buffer will be returned to a pending
415 	 * read call.  Perform this first test without a filter, to ensure that
416 	 * the default behavior is to accept all packets.  The side effect is
417 	 * that we may receive other loopback traffic as part of our capture.
418 	 */
419 	pid = fork();
420 	switch (pid) {
421 	case 0:
422 		errct = 0;
423 
424 		usleep(SLEEP_TIME);
425 
426 		test94_fill_random(fd2, buf, size);
427 
428 		exit(errct);
429 	case -1:
430 		e(0);
431 
432 		break;
433 	default:
434 		break;
435 	}
436 
437 	len = read(fd, buf, size);
438 
439 	if (len < size * 3/4) e(0);
440 	if (len > size) e(0);
441 	test94_check(buf, len, 1 /*seq*/, 0 /*filtered*/, NULL /*caplen*/,
442 	    NULL /*datalen*/);
443 
444 	if (wait(&status) != pid) e(0);
445 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
446 
447 	/* Only the exact buffer size may be used in read calls. */
448 	if (read(fd, buf, size - 1) != -1) e(0);
449 	if (errno != EINVAL) e(0);
450 	if (read(fd, buf, size + 1) != -1) e(0);
451 	if (errno != EINVAL) e(0);
452 	if (read(fd, buf, sizeof(struct bpf_hdr)) != -1) e(0);
453 	if (errno != EINVAL) e(0);
454 
455 	/*
456 	 * Install a filter to improve predictability for the remaining tests.
457 	 */
458 	memset(&bf, 0, sizeof(bf));
459 	bf.bf_len = __arraycount(test94_filter);
460 	bf.bf_insns = test94_filter;
461 	if (ioctl(fd, BIOCSETF, &bf) != 0) e(0);
462 
463 	/*
464 	 * Next we want to test that an already filled-up buffer will be
465 	 * returned to a read call immediately.  We take the opportunity to
466 	 * test that filling the buffer will also wake up a blocked select
467 	 * call.  In addition, we test ioctl(FIONREAD).
468 	 */
469 	tv.tv_sec = 0;
470 	tv.tv_usec = 0;
471 	FD_ZERO(&fds);
472 	FD_SET(fd, &fds);
473 	if (select(fd + 1, &fds, NULL, NULL, &tv) != 0) e(0);
474 	if (FD_ISSET(fd, &fds)) e(0);
475 
476 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
477 	if (bytes != 0) e(0);
478 
479 	pid = fork();
480 	switch (pid) {
481 	case 0:
482 		errct = 0;
483 
484 		usleep(SLEEP_TIME);
485 
486 		test94_fill_random(fd2, buf, size);
487 
488 		exit(errct);
489 	case -1:
490 		e(0);
491 
492 		break;
493 	default:
494 		break;
495 	}
496 
497 	FD_ZERO(&fds);
498 	FD_SET(fd, &fds);
499 	if (select(fd + 1, &fds, NULL, NULL, NULL) != 1) e(0);
500 	if (!FD_ISSET(fd, &fds)) e(0);
501 
502 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
503 
504 	if (select(fd + 1, &fds, NULL, NULL, NULL) != 1) e(0);
505 	if (!FD_ISSET(fd, &fds)) e(0);
506 
507 	len = read(fd, buf, size);
508 
509 	if (len < size * 3/4) e(0);
510 	if (len > size) e(0);
511 	seq = test94_check(buf, len, 1 /*seq*/, 1 /*filtered*/,
512 	    NULL /*caplen*/, NULL /*datalen*/);
513 
514 	if (len != bytes) e(0);
515 
516 	if (wait(&status) != pid) e(0);
517 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
518 
519 	/* There is one more packet in the store buffer at this point. */
520 	tv.tv_sec = 0;
521 	tv.tv_usec = 0;
522 	FD_ZERO(&fds);
523 	FD_SET(fd, &fds);
524 	if (select(fd + 1, &fds, NULL, NULL, &tv) != 0) e(0);
525 	if (FD_ISSET(fd, &fds)) e(0);
526 
527 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
528 	if (bytes != 0) e(0);
529 
530 	/*
531 	 * Next, we test whether read timeouts work, first checking that a
532 	 * timed-out read call returns any packets currently in the buffer.
533 	 * We use sleep and a signal as a crude way to test that the call was
534 	 * actually blocked until the timeout occurred.
535 	 */
536 	got_signal = 0;
537 
538 	pid = fork();
539 	switch (pid) {
540 	case 0:
541 		errct = 0;
542 
543 		signal(SIGUSR1, test94_signal);
544 
545 		usleep(SLEEP_TIME);
546 
547 		test94_add_random(fd2, buf, size, seq + 1);
548 
549 		usleep(SLEEP_TIME);
550 
551 		if (got_signal != 0) e(0);
552 		pause();
553 		if (got_signal != 1) e(0);
554 
555 		exit(errct);
556 	case -1:
557 		e(0);
558 
559 		break;
560 	default:
561 		break;
562 	}
563 
564 	tv.tv_sec = 0;
565 	tv.tv_usec = SLEEP_TIME * 3;
566 	if (ioctl(fd, BIOCSRTIMEOUT, &tv) != 0) e(0);
567 
568 	len = read(fd, buf, size);
569 	if (len <= 0) e(0);
570 	if (len >= size * 3/4) e(0);	/* two packets < 3/4 of the size */
571 	if (test94_check(buf, len, seq, 1 /*filtered*/, NULL /*caplen*/,
572 	    NULL /*datalen*/) != seq + 2) e(0);
573 
574 	if (kill(pid, SIGUSR1) != 0) e(0);
575 
576 	if (wait(&status) != pid) e(0);
577 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
578 
579 	/*
580 	 * Next, see if a timed-out read will all buffers empty yields EAGAIN.
581 	 */
582 	tv.tv_sec = 0;
583 	tv.tv_usec = SLEEP_TIME;
584 	if (ioctl(fd, BIOCSRTIMEOUT, &tv) != 0) e(0);
585 
586 	if (read(fd, buf, size) != -1) e(0);
587 	if (errno != EAGAIN) e(0);
588 
589 	/*
590 	 * Verify that resetting the timeout to zero makes the call block
591 	 * forever (for short test values of "forever" anyway), because
592 	 * otherwise this may create a false illusion of correctness in the
593 	 * next test, for non-blocking calls.  As a side effect, this tests
594 	 * read call signal interruption, and ensures no partial results are
595 	 * returned in that case.
596 	 */
597 	tv.tv_sec = 0;
598 	tv.tv_usec = 0;
599 	if (ioctl(fd, BIOCSRTIMEOUT, &tv) != 0) e(0);
600 
601 	pid = fork();
602 	switch (pid) {
603 	case 0:
604 		errct = 0;
605 
606 		signal(SIGUSR1, test94_signal);
607 
608 		if (read(fd, buf, size) != -1) e(0);
609 		if (errno != EINTR) e(0);
610 
611 		if (got_signal != 1) e(0);
612 
613 		exit(errct);
614 	case -1:
615 		e(0);
616 
617 		break;
618 	default:
619 		break;
620 	}
621 
622 	usleep(SLEEP_TIME * 2);
623 
624 	if (kill(pid, SIGUSR1) != 0) e(0);
625 
626 	if (wait(&status) != pid) e(0);
627 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
628 
629 	/*
630 	 * Repeat the same test with a non-full, non-empty buffer, to ensure
631 	 * that interrupted reads do not return partial results.
632 	 */
633 	pid = fork();
634 	switch (pid) {
635 	case 0:
636 		errct = 0;
637 
638 		signal(SIGUSR1, test94_signal);
639 
640 		if (read(fd, buf, size) != -1) e(0);
641 		if (errno != EINTR) e(0);
642 
643 		if (got_signal != 1) e(0);
644 
645 		exit(errct);
646 	case -1:
647 		e(0);
648 
649 		break;
650 	default:
651 		break;
652 	}
653 
654 	usleep(SLEEP_TIME);
655 
656 	test94_add_random(fd2, buf, size, 2);
657 
658 	usleep(SLEEP_TIME);
659 
660 	if (kill(pid, SIGUSR1) != 0) e(0);
661 
662 	if (wait(&status) != pid) e(0);
663 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
664 
665 	/*
666 	 * Test non-blocking reads with empty, full, and non-empty buffers.
667 	 * Against common sense, the last case should return whatever is in
668 	 * the buffer rather than EAGAIN, like immediate-mode reads would.
669 	 */
670 	if ((fl = fcntl(fd, F_GETFL)) == -1) e(0);
671 	if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) != 0) e(0);
672 
673 	len = read(fd, buf, size);
674 	if (len <= 0) e(0);
675 	if (len >= size * 3/4) e(0);	/* one packet < 3/4 of the size */
676 	seq = test94_check(buf, len, 2 /*seq*/, 1 /*filtered*/,
677 	    NULL /*caplen*/, NULL /*datalen*/);
678 
679 	if (read(fd, buf, size) != -1) e(0);
680 	if (errno != EAGAIN) e(0);
681 
682 	test94_fill_random(fd2, buf, size);
683 
684 	len = read(fd, buf, size);
685 	if (len < size * 3/4) e(0);
686 	if (len > size) e(0);
687 	seq = test94_check(buf, len, 1 /*seq*/, 1 /*filtered*/,
688 	    NULL /*caplen*/, NULL /*datalen*/);
689 
690 	len = read(fd, buf, size);
691 
692 	if (len <= 0) e(0);
693 	if (len >= size * 3/4) e(0);	/* one packet < 3/4 of the size */
694 	if (test94_check(buf, len, seq, 1 /*filtered*/, NULL /*caplen*/,
695 	    NULL /*datalen*/) != seq + 1) e(0);
696 
697 	if (fcntl(fd, F_SETFL, fl) != 0) e(0);
698 
699 	/*
700 	 * Test two remaining aspects of select(2): single-packet arrivals do
701 	 * not cause a wake-up, and the read timer has no effect.  The latter
702 	 * is a deliberate implementation choice where we diverge from NetBSD,
703 	 * because it requires keeping state in a way that violates the
704 	 * principle of system call independence.
705 	 */
706 	tv.tv_sec = 0;
707 	tv.tv_usec = SLEEP_TIME * 2;
708 	if (ioctl(fd, BIOCSRTIMEOUT, &tv) != 0) e(0);
709 
710 	pid = fork();
711 	switch (pid) {
712 	case 0:
713 		errct = 0;
714 
715 		usleep(SLEEP_TIME);
716 
717 		test94_add_random(fd2, buf, size, 1);
718 
719 		exit(errct);
720 	case -1:
721 		e(0);
722 
723 		break;
724 	default:
725 		break;
726 	}
727 
728 	tv.tv_sec = 1;
729 	tv.tv_usec = 0;
730 	FD_ZERO(&fds);
731 	FD_SET(fd, &fds);
732 	if (select(fd + 1, &fds, NULL, NULL, &tv) != 0) e(0);
733 
734 	if (wait(&status) != pid) e(0);
735 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
736 
737 	test94_cleanup(fd, fd2, fd3, buf);
738 }
739 
740 /*
741  * Test reading packets from a BPF device, using immediate mode.
742  */
743 static void
744 test94b(void)
745 {
746 	struct timeval tv;
747 	fd_set fds;
748 	uint8_t *buf;
749 	unsigned int val;
750 	size_t size;
751 	ssize_t len;
752 	uint32_t seq;
753 	pid_t pid;
754 	int fd, fd2, fd3, bytes, status, fl;
755 
756 	subtest = 2;
757 
758 	size = test94_setup(&fd, &fd2, &fd3, &buf, 0 /*size*/,
759 	    1 /*set_filter*/);
760 
761 	val = 1;
762 	if (ioctl(fd, BIOCIMMEDIATE, &val) != 0) e(0);
763 
764 	tv.tv_sec = 0;
765 	tv.tv_usec = 0;
766 	FD_ZERO(&fds);
767 	FD_SET(fd, &fds);
768 	if (select(fd + 1, &fds, NULL, NULL, &tv) != 0) e(0);
769 
770 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
771 	if (bytes != 0) e(0);
772 
773 	/*
774 	 * Ensure that if the hold buffer is full, an immediate-mode read
775 	 * returns the content of the hold buffer, even if the store buffer is
776 	 * not empty.
777 	 */
778 	test94_fill_random(fd2, buf, size);
779 
780 	FD_ZERO(&fds);
781 	FD_SET(fd, &fds);
782 	if (select(fd + 1, &fds, NULL, NULL, &tv) != 1) e(0);
783 	if (!FD_ISSET(fd, &fds)) e(0);
784 
785 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
786 
787 	len = read(fd, buf, size);
788 	if (len < size * 3/4) e(0);
789 	if (len > size) e(0);
790 	seq = test94_check(buf, len, 1 /*seq*/, 1 /*filtered*/,
791 	    NULL /*caplen*/, NULL /*datalen*/);
792 
793 	if (len != bytes) e(0);
794 
795 	/*
796 	 * There is one packet left in the buffer.  In immediate mode, this
797 	 * packet should be returned immediately.
798 	 */
799 	FD_ZERO(&fds);
800 	FD_SET(fd, &fds);
801 	if (select(fd + 1, &fds, NULL, NULL, &tv) != 1) e(0);
802 	if (!FD_ISSET(fd, &fds)) e(0);
803 
804 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
805 
806 	len = read(fd, buf, size);
807 	if (len <= 0) e(0);
808 	if (len >= size * 3/4) e(0);	/* one packet < 3/4 of the size */
809 	if (test94_check(buf, len, seq, 1 /*filtered*/, NULL /*caplen*/,
810 	    NULL /*datalen*/) != seq + 1) e(0);
811 
812 	if (len != bytes) e(0);
813 
814 	/* The buffer is now empty again. */
815 	FD_ZERO(&fds);
816 	FD_SET(fd, &fds);
817 	if (select(fd + 1, &fds, NULL, NULL, &tv) != 0) e(0);
818 
819 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
820 	if (bytes != 0) e(0);
821 
822 	/*
823 	 * Immediate-mode reads may return multiple packets from the store
824 	 * buffer.
825 	 */
826 	test94_add_random(fd2, buf, size, seq + 1);
827 	test94_add_random(fd2, buf, size, seq + 2);
828 
829 	FD_ZERO(&fds);
830 	FD_SET(fd, &fds);
831 	if (select(fd + 1, &fds, NULL, NULL, &tv) != 1) e(0);
832 	if (!FD_ISSET(fd, &fds)) e(0);
833 
834 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
835 
836 	len = read(fd, buf, size);
837 	if (len <= 0) e(0);
838 	if (len >= size * 3/4) e(0);	/* two packets < 3/4 of the size */
839 	if (test94_check(buf, len, seq + 1, 1 /*filtered*/, NULL /*caplen*/,
840 	    NULL /*datalen*/) != seq + 3) e(0);
841 
842 	if (len != bytes) e(0);
843 
844 	/*
845 	 * Now test waking up suspended calls, read(2) first.
846 	 */
847 	pid = fork();
848 	switch (pid) {
849 	case 0:
850 		errct = 0;
851 
852 		usleep(SLEEP_TIME);
853 
854 		test94_add_random(fd2, buf, size, seq + 3);
855 
856 		exit(errct);
857 	case -1:
858 		e(0);
859 
860 		break;
861 	default:
862 		break;
863 	}
864 
865 	len = read(fd, buf, size);
866 	if (len <= 0) e(0);
867 	if (len >= size * 3/4) e(0);	/* one packet < 3/4 of the size */
868 	if (test94_check(buf, len, seq + 3, 1 /*filtered*/, NULL /*caplen*/,
869 	    NULL /*datalen*/) != seq + 4) e(0);
870 
871 	if (wait(&status) != pid) e(0);
872 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
873 
874 	/*
875 	 * Then select(2).
876 	 */
877 	pid = fork();
878 	switch (pid) {
879 	case 0:
880 		errct = 0;
881 
882 		usleep(SLEEP_TIME);
883 
884 		test94_add_random(fd2, buf, size, seq + 4);
885 
886 		exit(errct);
887 	case -1:
888 		e(0);
889 
890 		break;
891 	default:
892 		break;
893 	}
894 
895 	FD_ZERO(&fds);
896 	FD_SET(fd, &fds);
897 	if (select(fd + 1, &fds, NULL, NULL, NULL) != 1) e(0);
898 	if (!FD_ISSET(fd, &fds)) e(0);
899 
900 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
901 
902 	if (select(fd + 1, &fds, NULL, NULL, NULL) != 1) e(0);
903 	if (!FD_ISSET(fd, &fds)) e(0);
904 
905 	len = read(fd, buf, size);
906 	if (len <= 0) e(0);
907 	if (len >= size * 3/4) e(0);	/* one packet < 3/4 of the size */
908 	if (test94_check(buf, len, seq + 4, 1 /*filtered*/, NULL /*caplen*/,
909 	    NULL /*datalen*/) != seq + 5) e(0);
910 
911 	if (len != bytes) e(0);
912 
913 	if (wait(&status) != pid) e(0);
914 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
915 
916 	/*
917 	 * Non-blocking reads should behave just as with regular mode.
918 	 */
919 	if ((fl = fcntl(fd, F_GETFL)) == -1) e(0);
920 	if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) != 0) e(0);
921 
922 	if (read(fd, buf, size) != -1) e(0);
923 	if (errno != EAGAIN) e(0);
924 
925 	test94_fill_random(fd2, buf, size);
926 
927 	len = read(fd, buf, size);
928 	if (len < size * 3/4) e(0);
929 	if (len > size) e(0);
930 	seq = test94_check(buf, len, 1 /*seq*/, 1 /*filtered*/,
931 	    NULL /*caplen*/, NULL /*datalen*/);
932 
933 	len = read(fd, buf, size);
934 	if (len <= 0) e(0);
935 	if (len >= size * 3/4) e(0);	/* one packet < 3/4 of the size */
936 	if (test94_check(buf, len, seq, 1 /*filtered*/, NULL /*caplen*/,
937 	    NULL /*datalen*/) != seq + 1) e(0);
938 
939 	if (fcntl(fd, F_SETFL, fl) != 0) e(0);
940 
941 	/*
942 	 * Timeouts should work with immediate mode.
943 	 */
944 	tv.tv_sec = 0;
945 	tv.tv_usec = SLEEP_TIME;
946 	if (ioctl(fd, BIOCSRTIMEOUT, &tv) != 0) e(0);
947 
948 	if (read(fd, buf, size) != -1) e(0);
949 	if (errno != EAGAIN) e(0);
950 
951 	test94_cleanup(fd, fd2, fd3, buf);
952 }
953 
954 /*
955  * Test reading packets from a BPF device, with an exactly filled buffer.  The
956  * idea is that normally the store buffer is considered "full" if the next
957  * packet does not fit in it, but if no more bytes are left in it, it can be
958  * rotated immediately.  This is a practically useless edge case, but we
959  * support it, so we might as well test it.  Also, some of the code for this
960  * case is shared with other rare cases that we cannot test here (interfaces
961  * disappearing, to be specific), and exactly filling up the buffers does test
962  * some other bounds checks so all that might make this worth it anyway.  While
963  * we are exercising full control over our buffers, also check statistics.
964  */
965 static void
966 test94c(void)
967 {
968 	struct bpf_stat bs;
969 	fd_set fds;
970 	uint8_t *buf;
971 	size_t size;
972 	pid_t pid;
973 	uint32_t count, seq;
974 	int fd, fd2, fd3, bytes, status, fl;
975 
976 	subtest = 3;
977 
978 	size = test94_setup(&fd, &fd2, &fd3, &buf, 0 /*size*/,
979 	    1 /*set_filter*/);
980 
981 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
982 	if (bs.bs_capt != 0) e(0);
983 	if (bs.bs_drop != 0) e(0);
984 
985 	/*
986 	 * Test read, select, and ioctl(FIONREAD) on an exactly filled buffer.
987 	 */
988 	count = test94_fill_exact(fd2, buf, size, 0);
989 
990 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
991 	if (bs.bs_capt != count) e(0);
992 	if (bs.bs_recv < bs.bs_capt) e(0); /* may be more */
993 	if (bs.bs_drop != 0) e(0);
994 
995 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
996 	if (bytes != size) e(0);
997 
998 	FD_ZERO(&fds);
999 	FD_SET(fd, &fds);
1000 	if (select(fd + 1, &fds, NULL, NULL, NULL) != 1) e(0);
1001 	if (!FD_ISSET(fd, &fds)) e(0);
1002 
1003 	if (read(fd, buf, size) != size) e(0);
1004 	test94_check(buf, size, 0 /*seq*/, 1 /*filtered*/, NULL /*caplen*/,
1005 	    NULL /*datalen*/);
1006 
1007 	/*
1008 	 * If the store buffer is full, the buffers should be swapped after
1009 	 * emptying the hold buffer.
1010 	 */
1011 	seq = test94_fill_exact(fd2, buf, size, 1);
1012 	test94_fill_exact(fd2, buf, size, seq);
1013 
1014 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
1015 	if (bs.bs_capt != count * 3) e(0);
1016 	if (bs.bs_recv < bs.bs_capt) e(0); /* may be more */
1017 	if (bs.bs_drop != 0) e(0);
1018 
1019 	test94_add_random(fd2, buf, size, 0); /* this one will get dropped */
1020 
1021 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
1022 	if (bs.bs_capt != count * 3 + 1) e(0);
1023 	if (bs.bs_recv < bs.bs_capt) e(0); /* may be more */
1024 	if (bs.bs_drop != 1) e(0);
1025 
1026 	test94_add_random(fd2, buf, size, 0); /* this one will get dropped */
1027 
1028 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
1029 	if (bs.bs_capt != count * 3 + 2) e(0);
1030 	if (bs.bs_recv < bs.bs_capt) e(0); /* may be more */
1031 	if (bs.bs_drop != 2) e(0);
1032 
1033 	if (ioctl(fd, FIONREAD, &bytes) != 0) e(0);
1034 	if (bytes != size) e(0);
1035 
1036 	if (read(fd, buf, size) != size) e(0);
1037 	if (test94_check(buf, size, 1 /*seq*/, 1 /*filtered*/, NULL /*caplen*/,
1038 	    NULL /*datalen*/) != seq) e(0);
1039 
1040 	if (read(fd, buf, size) != size) e(0);
1041 	if (test94_check(buf, size, seq, 1 /*filtered*/, NULL /*caplen*/,
1042 	    NULL /*datalen*/) != count * 2 + 1) e(0);
1043 
1044 	/*
1045 	 * See if an exactly filled buffer resumes reads...
1046 	 */
1047 	pid = fork();
1048 	switch (pid) {
1049 	case 0:
1050 		errct = 0;
1051 
1052 		usleep(SLEEP_TIME);
1053 
1054 		test94_fill_exact(fd2, buf, size, 1);
1055 
1056 		exit(errct);
1057 	case -1:
1058 		e(0);
1059 
1060 		break;
1061 	default:
1062 		break;
1063 	}
1064 
1065 	if (read(fd, buf, size) != size) e(0);
1066 	test94_check(buf, size, 1 /*seq*/, 1 /*filtered*/, NULL /*caplen*/,
1067 	    NULL /*datalen*/);
1068 
1069 	if (wait(&status) != pid) e(0);
1070 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
1071 
1072 	/*
1073 	 * ...and selects.
1074 	 */
1075 	pid = fork();
1076 	switch (pid) {
1077 	case 0:
1078 		errct = 0;
1079 
1080 		usleep(SLEEP_TIME);
1081 
1082 		test94_fill_exact(fd2, buf, size, seq);
1083 
1084 		exit(errct);
1085 	case -1:
1086 		e(0);
1087 
1088 		break;
1089 	default:
1090 		break;
1091 	}
1092 
1093 	FD_ZERO(&fds);
1094 	FD_SET(fd, &fds);
1095 	if (select(fd + 1, &fds, NULL, NULL, NULL) != 1) e(0);
1096 	if (!FD_ISSET(fd, &fds)) e(0);
1097 
1098 	if ((fl = fcntl(fd, F_GETFL)) == -1) e(0);
1099 	if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) != 0) e(0);
1100 
1101 	if (read(fd, buf, size) != size) e(0);
1102 	test94_check(buf, size, seq, 1 /*filtered*/, NULL /*caplen*/,
1103 	    NULL /*datalen*/);
1104 
1105 	if (read(fd, buf, size) != -1) e(0);
1106 	if (errno != EAGAIN) e(0);
1107 
1108 	if (wait(&status) != pid) e(0);
1109 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
1110 
1111 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
1112 	if (bs.bs_capt != count * 5 + 2) e(0);
1113 	if (bs.bs_recv < bs.bs_capt) e(0); /* may be more */
1114 	if (bs.bs_drop != 2) e(0);
1115 
1116 	test94_cleanup(fd, fd2, fd3, buf);
1117 }
1118 
1119 /*
1120  * Test receipt of large packets on BPF devices.  Large packets should be
1121  * truncated to the size of the buffer, but unless the filter specifies a
1122  * smaller capture size, no more than that.
1123  */
1124 static void
1125 test94d(void)
1126 {
1127 	struct bpf_hdr bh;
1128 	uint8_t *buf, *buf2;
1129 	size_t size;
1130 	ssize_t len;
1131 	int fd, fd2, fd3, datalen;
1132 
1133 	subtest = 4;
1134 
1135 	/*
1136 	 * Specify a size smaller than the largest packet we can send on the
1137 	 * loopback device.  The size we specify here is currently the default
1138 	 * size already anyway, but that might change in the future.
1139 	 */
1140 	size = test94_setup(&fd, &fd2, &fd3, &buf, 32768 /*size*/,
1141 	    1 /*set_filter*/);
1142 	if (size != 32768) e(0);
1143 
1144 	datalen = 65000;
1145 	if (setsockopt(fd2, SOL_SOCKET, SO_SNDBUF, &datalen,
1146 	    sizeof(datalen)) != 0) e(0);
1147 
1148 	if ((buf2 = malloc(datalen)) == NULL) e(0);
1149 
1150 	memset(buf2, 'Y', datalen);
1151 	buf2[0] = 'X';
1152 	buf2[size - sizeof(struct udphdr) - sizeof(struct ip) -
1153 	    BPF_WORDALIGN(sizeof(bh)) - 1] = 'Z';
1154 
1155 	if (write(fd2, buf2, datalen) != datalen) e(0);
1156 
1157 	if (read(fd, buf, size) != size) e(0);
1158 
1159 	memcpy(&bh, buf, sizeof(bh));
1160 
1161 	if (bh.bh_hdrlen != BPF_WORDALIGN(sizeof(bh))) e(0);
1162 	if (bh.bh_caplen != size - BPF_WORDALIGN(sizeof(bh))) e(0);
1163 	if (bh.bh_datalen !=
1164 	    sizeof(struct ip) + sizeof(struct udphdr) + datalen) e(0);
1165 
1166 	if (buf[BPF_WORDALIGN(sizeof(bh)) + sizeof(struct ip) +
1167 	    sizeof(struct udphdr)] != 'X') e(0);
1168 	if (buf[size - 2] != 'Y') e(0);
1169 	if (buf[size - 1] != 'Z') e(0);
1170 
1171 	/*
1172 	 * Add a smaller packet in between, to ensure that 1) the large packet
1173 	 * is not split across buffers, and 2) the packet is truncated to the
1174 	 * size of the buffer, not the available part of the buffer.  Note how
1175 	 * forced rotation and our exact-fill policy preclude us from having to
1176 	 * use immediate mode for any of this.
1177 	 */
1178 	test94_add_random(fd2, buf, size, 1 /*seq*/);
1179 
1180 	if (write(fd2, buf2, datalen) != datalen) e(0);
1181 
1182 	len = read(fd, buf, size);
1183 	if (len <= 0) e(0);
1184 	if (len >= size * 3/4) e(0);	/* one packet < 3/4 of the size */
1185 	if (test94_check(buf, len, 1 /*seq*/, 1 /*filtered*/, NULL /*caplen*/,
1186 	    NULL /*datalen*/) != 2) e(0);
1187 
1188 	if (read(fd, buf, size) != size) e(0);
1189 
1190 	memcpy(&bh, buf, sizeof(bh));
1191 
1192 	if (bh.bh_hdrlen != BPF_WORDALIGN(sizeof(bh))) e(0);
1193 	if (bh.bh_caplen != size - BPF_WORDALIGN(sizeof(bh))) e(0);
1194 	if (bh.bh_datalen !=
1195 	    sizeof(struct ip) + sizeof(struct udphdr) + datalen) e(0);
1196 
1197 	if (buf[BPF_WORDALIGN(sizeof(bh)) + sizeof(struct ip) +
1198 	    sizeof(struct udphdr)] != 'X') e(0);
1199 	if (buf[size - 2] != 'Y') e(0);
1200 	if (buf[size - 1] != 'Z') e(0);
1201 
1202 	free(buf2);
1203 
1204 	test94_cleanup(fd, fd2, fd3, buf);
1205 }
1206 
1207 /*
1208  * Test whether our filter is active through two-way communication and a
1209  * subsequent check on the BPF statistics.  We do not actually look through the
1210  * captured packets, because who knows what else is active on the loopback
1211  * device (e.g., X11) and the extra code specifically to extract our packets in
1212  * the other direction is simply not worth it.
1213  */
1214 static void
1215 test94_comm(int fd, int fd2, int fd3, int filtered)
1216 {
1217 	struct bpf_stat bs;
1218 	char c;
1219 
1220 	if (write(fd2, "A", 1) != 1) e(0);
1221 
1222 	if (read(fd3, &c, 1) != 1) e(0);
1223 	if (c != 'A') e(0);
1224 
1225 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
1226 	if (bs.bs_recv == 0) e(0);
1227 	if (bs.bs_capt == 0) e(0);
1228 
1229 	if (ioctl(fd, BIOCFLUSH) != 0) e(0);
1230 
1231 	if (write(fd3, "B", 1) != 1) e(0);
1232 
1233 	if (read(fd2, &c, 1) != 1) e(0);
1234 	if (c != 'B') e(0);
1235 
1236 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
1237 	if (bs.bs_recv == 0) e(0);
1238 
1239 	if (filtered) {
1240 		if (bs.bs_capt != 0) e(0);
1241 		if (bs.bs_drop != 0) e(0);
1242 	} else
1243 		if (bs.bs_capt == 0) e(0);
1244 
1245 	if (ioctl(fd, BIOCFLUSH) != 0) e(0);
1246 }
1247 
1248 /*
1249  * Test filter installation and mechanics.
1250  */
1251 static void
1252 test94e(void)
1253 {
1254 	struct bpf_program bf;
1255 	struct bpf_stat bs;
1256 	struct bpf_hdr bh;
1257 	uint8_t *buf;
1258 	size_t size, len, plen, alen, off;
1259 	uint32_t seq, caplen[4], datalen[4];
1260 	int i, fd, fd2, fd3, val;
1261 
1262 	subtest = 5;
1263 
1264 	/*
1265 	 * We have already tested installing a filter both before and after
1266 	 * attaching to an interface by now, so we do not repeat that here.
1267 	 */
1268 	size = test94_setup(&fd, &fd2, &fd3, &buf, 0 /*size*/,
1269 	    0 /*set_filter*/);
1270 
1271 	val = 1;
1272 	if (ioctl(fd, BIOCIMMEDIATE, &val) != 0) e(0);
1273 
1274 	/*
1275 	 * A filter that is too large is rejected.  Unfortunately, due to
1276 	 * necessary IOCTL rewriting, this tests libc, not the service.
1277 	 */
1278 	memset(&bf, 0, sizeof(bf));
1279 	bf.bf_len = BPF_MAXINSNS + 1;
1280 	bf.bf_insns = NULL;
1281 	if (ioctl(fd, BIOCSETF, &bf) != -1) e(0);
1282 	if (errno != EINVAL) e(0);
1283 
1284 	/*
1285 	 * An invalid filter is rejected.  In this test case, the truncated
1286 	 * filter has a jump target beyond the end of the filter program.
1287 	 */
1288 	memset(&bf, 0, sizeof(bf));
1289 	bf.bf_len = __arraycount(test94_filter) - 1;
1290 	bf.bf_insns = test94_filter;
1291 	if (ioctl(fd, BIOCSETF, &bf) != -1) e(0);
1292 	if (errno != EINVAL) e(0);
1293 
1294 	test94_comm(fd, fd2, fd3, 0 /*filtered*/);
1295 
1296 	bf.bf_len++;
1297 	if (ioctl(fd, BIOCSETF, &bf) != 0) e(0);
1298 
1299 	test94_comm(fd, fd2, fd3, 1 /*filtered*/);
1300 
1301 	/*
1302 	 * Installing a zero-length filter clears the current filter, if any.
1303 	 */
1304 	memset(&bf, 0, sizeof(bf));
1305 	if (ioctl(fd, BIOCSETF, &bf) != 0) e(0);
1306 
1307 	test94_comm(fd, fd2, fd3, 0 /*filtered*/);
1308 
1309 	/* Test this twice to trip over unconditional filter deallocation. */
1310 	memset(&bf, 0, sizeof(bf));
1311 	if (ioctl(fd, BIOCSETF, &bf) != 0) e(0);
1312 
1313 	test94_comm(fd, fd2, fd3, 0 /*filtered*/);
1314 
1315 	/*
1316 	 * Test both aligned and unaligned capture sizes.  For each, test
1317 	 * sizes larger than, equal to, and smaller than the capture size.
1318 	 * In both cases, aggregate the packets into a single buffer and only
1319 	 * then go through them, to see whether alignment was done correctly.
1320 	 * We cannot do everything in one go as BIOCSETF implies a BIOCFLUSH.
1321 	 */
1322 	plen = sizeof(struct ip) + sizeof(struct udphdr) + sizeof(seq);
1323 	if (BPF_WORDALIGN(plen) != plen) e(0);
1324 	alen = BPF_WORDALIGN(plen + 1);
1325 	if (alen - 2 <= plen + 1) e(0);
1326 
1327 	/* First the aligned cases. */
1328 	test94_filter[__arraycount(test94_filter) - 1].k = alen;
1329 
1330 	memset(&bf, 0, sizeof(bf));
1331 	bf.bf_len = __arraycount(test94_filter);
1332 	bf.bf_insns = test94_filter;
1333 	if (ioctl(fd, BIOCSETF, &bf) != 0) e(0);
1334 
1335 	test94_comm(fd, fd2, fd3, 1 /*filtered*/);
1336 
1337 	test94_add_specific(fd2, buf, alen + 1 - plen, 1);
1338 	caplen[0] = alen;
1339 	datalen[0] = alen + 1;
1340 
1341 	test94_add_specific(fd2, buf, alen - plen, 2);
1342 	caplen[1] = alen;
1343 	datalen[1] = alen;
1344 
1345 	test94_add_specific(fd2, buf, alen + 3 - plen, 3);
1346 	caplen[2] = alen;
1347 	datalen[2] = alen + 3;
1348 
1349 	test94_add_specific(fd2, buf, alen - 1 - plen, 4);
1350 	caplen[3] = alen - 1;
1351 	datalen[3] = alen - 1;
1352 
1353 	memset(buf, 0, size);
1354 
1355 	len = read(fd, buf, size);
1356 
1357 	if (test94_check(buf, len, 1 /*seq*/, 1 /*filtered*/, caplen,
1358 	    datalen) != 5) e(0);
1359 
1360 	/* Then the unaligned cases. */
1361 	test94_filter[__arraycount(test94_filter) - 1].k = alen + 1;
1362 	if (ioctl(fd, BIOCSETF, &bf) != 0) e(0);
1363 
1364 	test94_add_specific(fd2, buf, alen + 2 - plen, 5);
1365 	caplen[0] = alen + 1;
1366 	datalen[0] = alen + 2;
1367 
1368 	test94_add_specific(fd2, buf, alen + 1 - plen, 6);
1369 	caplen[1] = alen + 1;
1370 	datalen[1] = alen + 1;
1371 
1372 	test94_add_specific(fd2, buf, alen + 9 - plen, 7);
1373 	caplen[2] = alen + 1;
1374 	datalen[2] = alen + 9;
1375 
1376 	test94_add_specific(fd2, buf, alen - plen, 8);
1377 	caplen[3] = alen;
1378 	datalen[3] = alen;
1379 
1380 	memset(buf, 0, size);
1381 
1382 	len = read(fd, buf, size);
1383 
1384 	if (test94_check(buf, len, 5 /*seq*/, 1 /*filtered*/, caplen,
1385 	    datalen) != 9) e(0);
1386 
1387 	/*
1388 	 * Check that capturing only one byte from packets is possible.  Not
1389 	 * that that would be particularly useful.
1390 	 */
1391 	test94_filter[__arraycount(test94_filter) - 1].k = 1;
1392 	if (ioctl(fd, BIOCSETF, &bf) != 0) e(0);
1393 
1394 	test94_add_random(fd2, buf, size, 9);
1395 	test94_add_random(fd2, buf, size, 10);
1396 	test94_add_random(fd2, buf, size, 11);
1397 
1398 	memset(buf, 0, size);
1399 
1400 	len = read(fd, buf, size);
1401 	if (len <= 0) e(0);
1402 
1403 	off = 0;
1404 	for (i = 0; i < 3; i++) {
1405 		if (len - off < sizeof(bh)) e(0);
1406 		memcpy(&bh, &buf[off], sizeof(bh));
1407 
1408 		if (bh.bh_tstamp.tv_sec == 0 && bh.bh_tstamp.tv_usec == 0)
1409 			e(0);
1410 		if (bh.bh_caplen != 1) e(0);
1411 		if (bh.bh_datalen < plen) e(0);
1412 		if (bh.bh_hdrlen != BPF_WORDALIGN(sizeof(bh))) e(0);
1413 
1414 		off += bh.bh_hdrlen;
1415 
1416 		if (buf[off] != 0x45) e(0);
1417 
1418 		off += BPF_WORDALIGN(bh.bh_caplen);
1419 	}
1420 	if (off != len) e(0);
1421 
1422 	/*
1423 	 * Finally, a zero capture size should result in rejected packets only.
1424 	 */
1425 	test94_filter[__arraycount(test94_filter) - 1].k = 0;
1426 	if (ioctl(fd, BIOCSETF, &bf) != 0) e(0);
1427 
1428 	test94_add_random(fd2, buf, size, 12);
1429 	test94_add_random(fd2, buf, size, 13);
1430 	test94_add_random(fd2, buf, size, 14);
1431 
1432 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
1433 	if (bs.bs_recv < 3) e(0);
1434 	if (bs.bs_capt != 0) e(0);
1435 	if (bs.bs_drop != 0) e(0);
1436 
1437 	/* Restore the capture limit of the filter to its original state. */
1438 	test94_filter[__arraycount(test94_filter) - 1].k = (uint32_t)-1;
1439 
1440 	test94_cleanup(fd, fd2, fd3, buf);
1441 }
1442 
1443 /*
1444  * Compute an IP checksum.
1445  */
1446 static uint16_t
1447 test94_cksum(uint8_t * buf, size_t len)
1448 {
1449 	uint32_t sum, word;
1450 
1451 	/* This is a really dumb implementation but *shrug*. */
1452 	for (sum = 0; len > 0; sum += word) {
1453 		if (len > 1) {
1454 			word = buf[0] << 8 | buf[1];
1455 			buf += 2;
1456 			len -= 2;
1457 		} else {
1458 			word = buf[0] << 8;
1459 			len--;
1460 		}
1461 	}
1462 
1463 	while (sum > UINT16_MAX)
1464 		sum = (sum & UINT16_MAX) + (sum >> 16);
1465 
1466 	return ~(uint16_t)sum;
1467 }
1468 
1469 /*
1470  * Set up UDP headers for a packet.  The packet uses IPv4 unless 'v6' is set,
1471  * in which case IPv6 is used.  The given buffer must be large enough to
1472  * contain the headers and the (to be appended) data.  The function returns the
1473  * offset into the buffer to the data portion of the packet.
1474  */
1475 static size_t
1476 test94_make_pkt(uint8_t * buf, size_t len, int v6)
1477 {
1478 	struct ip ip;
1479 	struct ip6_hdr ip6;
1480 	struct udphdr uh;
1481 	size_t off;
1482 
1483 	if (!v6) {
1484 		memset(&ip, 0, sizeof(ip));
1485 		ip.ip_v = IPVERSION;
1486 		ip.ip_hl = sizeof(ip) >> 2;
1487 		ip.ip_len = htons(sizeof(ip) + sizeof(uh) + len);
1488 		ip.ip_ttl = 255;
1489 		ip.ip_p = IPPROTO_UDP;
1490 		ip.ip_sum = 0;
1491 		ip.ip_src.s_addr = htonl(INADDR_LOOPBACK);
1492 		ip.ip_dst.s_addr = htonl(INADDR_LOOPBACK);
1493 
1494 		memcpy(buf, &ip, sizeof(ip));
1495 		ip.ip_sum = htons(test94_cksum(buf, sizeof(ip)));
1496 		memcpy(buf, &ip, sizeof(ip));
1497 		if (test94_cksum(buf, sizeof(ip)) != 0) e(0);
1498 
1499 		off = sizeof(ip);
1500 	} else {
1501 		memset(&ip6, 0, sizeof(ip6));
1502 		ip6.ip6_vfc = IPV6_VERSION;
1503 		ip6.ip6_plen = htons(sizeof(uh) + len);
1504 		ip6.ip6_nxt = IPPROTO_UDP;
1505 		ip6.ip6_hlim = 255;
1506 		memcpy(&ip6.ip6_src, &in6addr_loopback, sizeof(ip6.ip6_src));
1507 		memcpy(&ip6.ip6_dst, &in6addr_loopback, sizeof(ip6.ip6_dst));
1508 
1509 		memcpy(buf, &ip6, sizeof(ip6));
1510 
1511 		off = sizeof(ip6);
1512 	}
1513 
1514 	memset(&uh, 0, sizeof(uh));
1515 	uh.uh_sport = htons(TEST_PORT_A);
1516 	uh.uh_dport = htons(TEST_PORT_B);
1517 	uh.uh_ulen = htons(sizeof(uh) + len);
1518 	uh.uh_sum = 0; /* lazy but we also don't have the data yet */
1519 
1520 	memcpy(buf + off, &uh, sizeof(uh));
1521 
1522 	return off + sizeof(uh);
1523 }
1524 
1525 /*
1526  * Test sending packets by writing to a BPF device.
1527  */
1528 static void
1529 test94f(void)
1530 {
1531 	struct bpf_stat bs;
1532 	struct ifreq ifr;
1533 	fd_set fds;
1534 	uint8_t *buf;
1535 	size_t off;
1536 	unsigned int i, uval, mtu;
1537 	int fd, fd2, fd3;
1538 
1539 	subtest = 6;
1540 
1541 	(void)test94_setup(&fd, &fd2, &fd3, &buf, 0 /*size*/,
1542 	    1 /*set_filter*/);
1543 
1544 	/*
1545 	 * Select queries should always indicate that the device is writable.
1546 	 */
1547 	FD_ZERO(&fds);
1548 	FD_SET(fd, &fds);
1549 	if (select(fd + 1, NULL, &fds, NULL, NULL) != 1) e(0);
1550 	if (!FD_ISSET(fd, &fds)) e(0);
1551 
1552 	/*
1553 	 * Test packet size limits.  For loopback devices, the maximum data
1554 	 * link layer level maximum transmission unit should be 65535-4 =
1555 	 * 65531 bytes.  Obtain the actual value anyway; it might have changed.
1556 	 */
1557 	memset(&ifr, 0, sizeof(ifr));
1558 	strlcpy(ifr.ifr_name, LOOPBACK_IFNAME, sizeof(ifr.ifr_name));
1559 
1560 	if (ioctl(fd2, SIOCGIFMTU, &ifr) != 0) e(0);
1561 	mtu = ifr.ifr_mtu;
1562 
1563 	if ((buf = realloc(buf, UINT16_MAX + 1)) == NULL) e(0);
1564 
1565 	memset(buf, 0, UINT16_MAX + 1);
1566 
1567 	for (i = UINT16_MAX + 1; i > mtu; i--) {
1568 		if (write(fd, buf, i) != -1) e(0);
1569 		if (errno != EMSGSIZE) e(0);
1570 	}
1571 
1572 	/* This packet will be discarded as completely crap.  That's fine. */
1573 	if (write(fd, buf, mtu) != mtu) e(0);
1574 
1575 	/*
1576 	 * Zero-sized writes are accepted but do not do anything.
1577 	 */
1578 	if (write(fd, buf, 0) != 0) e(0);
1579 
1580 	/*
1581 	 * Send an actual packet, and see if it arrives.
1582 	 */
1583 	off = test94_make_pkt(buf, 6, 0 /*v6*/);
1584 	memcpy(buf + off, "Hello!", 6);
1585 
1586 	if (write(fd, buf, off + 6) != off + 6) e(0);
1587 
1588 	memset(buf, 0, mtu);
1589 	if (read(fd3, buf, mtu) != 6) e(0);
1590 	if (memcmp(buf, "Hello!", 6) != 0) e(0);
1591 
1592 	/*
1593 	 * Enable feedback mode to test that the packet now arrives twice.
1594 	 * Send a somewhat larger packet to test that data copy-in handles
1595 	 * offsets correctly.
1596 	 */
1597 	uval = 1;
1598 	if (ioctl(fd, BIOCSFEEDBACK, &uval) != 0) e(0);
1599 
1600 	off = test94_make_pkt(buf, 12345, 0 /*v6*/);
1601 	for (i = 0; i < 12345; i++)
1602 		buf[off + i] = 1 + (i % 251); /* the largest prime < 255 */
1603 
1604 	if (write(fd, buf, off + 12345) != off + 12345) e(0);
1605 
1606 	/* We need a default UDP SO_RCVBUF >= 12345 * 2 for this. */
1607 	memset(buf, 0, UINT16_MAX);
1608 	if (recv(fd3, buf, UINT16_MAX, 0) != 12345) e(0);
1609 	for (i = 0; i < 12345; i++)
1610 		if (buf[i] != 1 + (i % 251)) e(0);
1611 
1612 	memset(buf, 0, UINT16_MAX);
1613 	if (recv(fd3, buf, UINT16_MAX, MSG_DONTWAIT) != 12345) e(0);
1614 	for (i = 0; i < 12345; i++)
1615 		if (buf[i] != 1 + (i % 251)) e(0);
1616 
1617 	if (recv(fd3, buf, UINT16_MAX, MSG_DONTWAIT) != -1) e(0);
1618 	if (errno != EWOULDBLOCK) e(0);
1619 
1620 	/*
1621 	 * The two valid packets we sent will have been captured by our BPF
1622 	 * device as well, because SEESENT is enabled by default and also
1623 	 * applies to packets written to a BPF device.  The reason for that is
1624 	 * that it allows tcpdump(8) to see what DHCP clients are sending, for
1625 	 * example.  The packets we sent are accepted by the installed filter.
1626 	 */
1627 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
1628 	if (bs.bs_capt != 2) e(0);
1629 
1630 	/* Now that we've written data, test select once more. */
1631 	FD_ZERO(&fds);
1632 	FD_SET(fd, &fds);
1633 	if (select(fd + 1, NULL, &fds, NULL, NULL) != 1) e(0);
1634 	if (!FD_ISSET(fd, &fds)) e(0);
1635 
1636 	test94_cleanup(fd, fd2, fd3, buf);
1637 }
1638 
1639 /*
1640  * Test read, write, and select operations on unconfigured devices.
1641  */
1642 static void
1643 test94g(void)
1644 {
1645 	fd_set rfds, wfds;
1646 	uint8_t *buf;
1647 	unsigned int size;
1648 	int fd;
1649 
1650 	subtest = 7;
1651 
1652 	if ((fd = open(_PATH_BPF, O_RDWR)) < 0) e(0);
1653 
1654 	if (ioctl(fd, BIOCGBLEN, &size) != 0) e(0);
1655 	if (size < 1024 || size > BPF_MAXBUFSIZE) e(0);
1656 
1657 	if ((buf = malloc(size)) == NULL) e(0);
1658 
1659 	if (read(fd, buf, size) != -1) e(0);
1660 	if (errno != EINVAL) e(0);
1661 
1662 	if (write(fd, buf, size) != -1) e(0);
1663 	if (errno != EINVAL) e(0);
1664 
1665 	FD_ZERO(&rfds);
1666 	FD_SET(fd, &rfds);
1667 	FD_ZERO(&wfds);
1668 	FD_SET(fd, &wfds);
1669 
1670 	if (select(fd + 1, &rfds, &wfds, NULL, NULL) != 2) e(0);
1671 
1672 	if (!FD_ISSET(fd, &rfds)) e(0);
1673 	if (!FD_ISSET(fd, &wfds)) e(0);
1674 
1675 	free(buf);
1676 
1677 	if (close(fd) != 0) e(0);
1678 }
1679 
1680 /*
1681  * Test various IOCTL calls.  Several of these tests are rather superficial,
1682  * because we would need a real interface, rather than the loopback device, to
1683  * test their functionality properly.  Also note that we skip various checks
1684  * performed as part of the earlier subtests.
1685  */
1686 static void
1687 test94h(void)
1688 {
1689 	struct bpf_stat bs;
1690 	struct bpf_version bv;
1691 	struct bpf_dltlist bfl;
1692 	struct ifreq ifr;
1693 	struct timeval tv;
1694 	uint8_t *buf;
1695 	size_t size;
1696 	unsigned int uval, list[2];
1697 	int cfd, ufd, fd2, fd3, val;
1698 
1699 	subtest = 8;
1700 
1701 	/*
1702 	 * Many IOCTLs work only on configured or only on unconfigured BPF
1703 	 * devices, so for convenience we create a file descriptor for each.
1704 	 */
1705 	size = test94_setup(&cfd, &fd2, &fd3, &buf, 0 /*size*/,
1706 	    1 /*set_filter*/);
1707 
1708 	if ((ufd = open(_PATH_BPF, O_RDWR)) < 0) e(0);
1709 
1710 	/*
1711 	 * The BIOCSBLEN value is silently corrected to fall within a valid
1712 	 * range, and BIOCGBLEN can be used to obtain the corrected value.  We
1713 	 * do not know the valid range, so we use fairly extreme test values.
1714 	 */
1715 	uval = 1;
1716 	if (ioctl(ufd, BIOCSBLEN, &uval) != 0) e(0);
1717 
1718 	if (ioctl(ufd, BIOCGBLEN, &uval) != 0) e(0);
1719 	if (uval < sizeof(struct bpf_hdr) || uval > BPF_MAXBUFSIZE) e(0);
1720 
1721 	uval = (unsigned int)-1;
1722 	if (ioctl(ufd, BIOCSBLEN, &uval) != 0) e(0);
1723 
1724 	if (ioctl(ufd, BIOCGBLEN, &uval) != 0) e(0);
1725 	if (uval < sizeof(struct bpf_hdr) || uval > BPF_MAXBUFSIZE) e(0);
1726 
1727 	uval = 0;
1728 	if (ioctl(ufd, BIOCSBLEN, &uval) != 0) e(0);
1729 
1730 	if (ioctl(ufd, BIOCGBLEN, &uval) != 0) e(0);
1731 	if (uval < sizeof(struct bpf_hdr) || uval > BPF_MAXBUFSIZE) e(0);
1732 
1733 	uval = 1024; /* ..a value that should be acceptable but small */
1734 	if (ioctl(ufd, BIOCSBLEN, &uval) != 0) e(0);
1735 	if (ioctl(ufd, BIOCGBLEN, &uval) != 0) e(0);
1736 	if (uval != 1024) e(0);
1737 
1738 	/*
1739 	 * For configured devices, it is not possible to adjust the buffer size
1740 	 * but it is possible to obtain its size.
1741 	 */
1742 	if (ioctl(cfd, BIOCSBLEN, &uval) != -1) e(0);
1743 	if (errno != EINVAL) e(0);
1744 
1745 	if (ioctl(cfd, BIOCGBLEN, &uval) != 0) e(0);
1746 	if (uval != size) e(0);
1747 
1748 	/*
1749 	 * BIOCFLUSH resets both buffer contents and statistics.
1750 	 */
1751 	uval = 1;
1752 	if (ioctl(cfd, BIOCIMMEDIATE, &uval) != 0) e(0);
1753 
1754 	test94_fill_exact(fd2, buf, size, 1 /*seq*/);
1755 	test94_fill_exact(fd2, buf, size, 1 /*seq*/);
1756 	test94_fill_exact(fd2, buf, size, 1 /*seq*/);
1757 
1758 	if (ioctl(cfd, BIOCGSTATS, &bs) != 0) e(0);
1759 	if (bs.bs_recv == 0) e(0);
1760 	if (bs.bs_drop == 0) e(0);
1761 	if (bs.bs_capt == 0) e(0);
1762 
1763 	/* Do make sure that statistics are not cleared on retrieval.. */
1764 	if (ioctl(cfd, BIOCGSTATS, &bs) != 0) e(0);
1765 	if (bs.bs_recv == 0) e(0);
1766 	if (bs.bs_drop == 0) e(0);
1767 	if (bs.bs_capt == 0) e(0);
1768 
1769 	if (ioctl(cfd, FIONREAD, &val) != 0) e(0);
1770 	if (val == 0) e(0);
1771 
1772 	if (ioctl(cfd, BIOCFLUSH) != 0) e(0);
1773 
1774 	/* There is a race condition for bs_recv here, so we cannot test it. */
1775 	if (ioctl(cfd, BIOCGSTATS, &bs) != 0) e(0);
1776 	if (bs.bs_drop != 0) e(0);
1777 	if (bs.bs_capt != 0) e(0);
1778 
1779 	if (ioctl(cfd, FIONREAD, &val) != 0) e(0);
1780 	if (val != 0) e(0);
1781 
1782 	/*
1783 	 * Although practically useless, BIOCFLUSH works on unconfigured
1784 	 * devices.  So does BIOCGSTATS.
1785 	 */
1786 	if (ioctl(ufd, BIOCFLUSH) != 0) e(0);
1787 
1788 	if (ioctl(ufd, BIOCGSTATS, &bs) != 0) e(0);
1789 	if (bs.bs_recv != 0) e(0);
1790 	if (bs.bs_drop != 0) e(0);
1791 	if (bs.bs_capt != 0) e(0);
1792 
1793 	/*
1794 	 * BIOCPROMISC works on configured devices only.  On loopback devices
1795 	 * it has no observable effect though.
1796 	 */
1797 	if (ioctl(ufd, BIOCPROMISC) != -1) e(0);
1798 	if (errno != EINVAL) e(0);
1799 
1800 	if (ioctl(cfd, BIOCPROMISC) != 0) e(0);
1801 
1802 	/*
1803 	 * BIOCGDLT does not work on unconfigured devices.
1804 	 */
1805 	if (ioctl(ufd, BIOCGDLT, &uval) != -1) e(0);
1806 	if (errno != EINVAL) e(0);
1807 
1808 	/*
1809 	 * BIOCGETIF works only on configured devices, where it returns the
1810 	 * associated device name.
1811 	 */
1812 	if (ioctl(ufd, BIOCGETIF, &ifr) != -1) e(0);
1813 	if (errno != EINVAL) e(0);
1814 
1815 	memset(&ifr, 'X', sizeof(ifr));
1816 	if (ioctl(cfd, BIOCGETIF, &ifr) != 0) e(0);
1817 	if (strcmp(ifr.ifr_name, LOOPBACK_IFNAME) != 0) e(0);
1818 
1819 	/*
1820 	 * BIOCSETIF works only on unconfigured devices, and accepts only valid
1821 	 * valid interface names.  The name is forced to be null terminated.
1822 	 */
1823 	memset(&ifr, 0, sizeof(ifr));
1824 	strlcpy(ifr.ifr_name, LOOPBACK_IFNAME, sizeof(ifr.ifr_name));
1825 	if (ioctl(cfd, BIOCSETIF, &ifr) != -1) e(0);
1826 	if (errno != EINVAL) e(0);
1827 
1828 	memset(&ifr, 0, sizeof(ifr));
1829 	memset(ifr.ifr_name, 'x', sizeof(ifr.ifr_name));
1830 	if (ioctl(ufd, BIOCSETIF, &ifr) != -1) e(0);
1831 	if (errno != ENXIO) e(0);
1832 
1833 	/* Anyone that has ten loopback devices is simply insane. */
1834 	memset(&ifr, 0, sizeof(ifr));
1835 	strlcpy(ifr.ifr_name, LOOPBACK_IFNAME, sizeof(ifr.ifr_name));
1836 	ifr.ifr_name[strlen(ifr.ifr_name) - 1] += 9;
1837 	if (ioctl(ufd, BIOCSETIF, &ifr) != -1) e(0);
1838 	if (errno != ENXIO) e(0);
1839 
1840 	/*
1841 	 * It is possible to turn BIOCIMMEDIATE on and off.  We already enabled
1842 	 * it a bit higher up.  Note that our implementation does not support
1843 	 * toggling the setting while a read call is no progress, and toggling
1844 	 * the setting will have no effect while a select call is in progress;
1845 	 * similar restrictions apply to effectively all relevant settings.
1846 	 * Either way we do not test that here either.
1847 	 */
1848 	test94_add_random(fd2, buf, size, 1 /*seq*/);
1849 
1850 	if (ioctl(cfd, FIONREAD, &val) != 0) e(0);
1851 	if (val == 0) e(0);
1852 
1853 	uval = 0;
1854 	if (ioctl(cfd, BIOCIMMEDIATE, &uval) != 0) e(0);
1855 
1856 	if (ioctl(cfd, FIONREAD, &val) != 0) e(0);
1857 	if (val != 0) e(0);
1858 
1859 	uval = 1;
1860 	if (ioctl(cfd, BIOCIMMEDIATE, &uval) != 0) e(0);
1861 
1862 	if (ioctl(cfd, FIONREAD, &val) != 0) e(0);
1863 	if (val == 0) e(0);
1864 
1865 	if (ioctl(cfd, BIOCFLUSH) != 0) e(0);
1866 
1867 	/*
1868 	 * BIOCIMMEDIATE also works on unconfigured devices.
1869 	 */
1870 	uval = 1;
1871 	if (ioctl(ufd, BIOCIMMEDIATE, &uval) != 0) e(0);
1872 
1873 	uval = 0;
1874 	if (ioctl(ufd, BIOCIMMEDIATE, &uval) != 0) e(0);
1875 
1876 	/*
1877 	 * BIOCVERSION should return the current BPF interface version.
1878 	 */
1879 	if (ioctl(ufd, BIOCVERSION, &bv) != 0) e(0);
1880 	if (bv.bv_major != BPF_MAJOR_VERSION) e(0);
1881 	if (bv.bv_minor != BPF_MINOR_VERSION) e(0);
1882 
1883 	/*
1884 	 * BIOCSHDRCMPLT makes sense only for devices with data link headers,
1885 	 * which rules out loopback devices.  Check the default and test
1886 	 * toggling it, and stop there.
1887 	 */
1888 	/* The default value is off. */
1889 	uval = 1;
1890 	if (ioctl(ufd, BIOCGHDRCMPLT, &uval) != 0) e(0);
1891 	if (uval != 0) e(0);
1892 
1893 	uval = 2;
1894 	if (ioctl(ufd, BIOCSHDRCMPLT, &uval) != 0) e(0);
1895 
1896 	if (ioctl(ufd, BIOCGHDRCMPLT, &uval) != 0) e(0);
1897 	if (uval != 1) e(0);
1898 
1899 	uval = 0;
1900 	if (ioctl(ufd, BIOCSHDRCMPLT, &uval) != 0) e(0);
1901 
1902 	uval = 1;
1903 	if (ioctl(ufd, BIOCGHDRCMPLT, &uval) != 0) e(0);
1904 	if (uval != 0) e(0);
1905 
1906 	/*
1907 	 * BIOCSDLT works on configured devices.  For loopback devices, it can
1908 	 * only set the data link type to its current value, which on MINIX3
1909 	 * for loopback devices is DLT_RAW (i.e., no headers at all).
1910 	 */
1911 	uval = DLT_RAW;
1912 	if (ioctl(ufd, BIOCSDLT, &uval) != -1) e(0);
1913 	if (errno != EINVAL) e(0);
1914 
1915 	uval = DLT_RAW;
1916 	if (ioctl(cfd, BIOCSDLT, &uval) != 0) e(0);
1917 
1918 	uval = DLT_NULL;
1919 	if (ioctl(cfd, BIOCSDLT, &uval) != -1) e(0);
1920 	if (errno != EINVAL) e(0);
1921 
1922 	if (ioctl(cfd, BIOCGDLT, &uval) != 0) e(0);
1923 	if (uval != DLT_RAW) e(0);
1924 
1925 	/*
1926 	 * BIOCGDLTLIST works on configured devices only, and may be used to
1927 	 * both query the size of the list and obtain the list.  On MINIX3,
1928 	 * loopback devices will only ever return DLT_RAW.  Unfortunately,
1929 	 * much of the handling for this IOCTL is in libc for us, which is also
1930 	 * why we do not test bad pointers and stuff like that.
1931 	 */
1932 	memset(&bfl, 0, sizeof(bfl));
1933 	if (ioctl(ufd, BIOCGDLTLIST, &bfl) != -1) e(0);
1934 	if (errno != EINVAL) e(0);
1935 
1936 	memset(&bfl, 0, sizeof(bfl));
1937 	if (ioctl(cfd, BIOCGDLTLIST, &bfl) != 0) e(0);
1938 	if (bfl.bfl_len != 1) e(0);
1939 	if (bfl.bfl_list != NULL) e(0);
1940 
1941 	memset(&bfl, 0, sizeof(bfl));
1942 	bfl.bfl_len = 2;	/* should be ignored */
1943 	if (ioctl(cfd, BIOCGDLTLIST, &bfl) != 0) e(0);
1944 	if (bfl.bfl_len != 1) e(0);
1945 	if (bfl.bfl_list != NULL) e(0);
1946 
1947 	memset(&bfl, 0, sizeof(bfl));
1948 	memset(list, 0, sizeof(list));
1949 	bfl.bfl_list = list;
1950 	if (ioctl(cfd, BIOCGDLTLIST, &bfl) != -1) e(0);
1951 	if (errno != ENOMEM) e(0);
1952 	if (list[0] != 0) e(0);
1953 
1954 	memset(&bfl, 0, sizeof(bfl));
1955 	bfl.bfl_len = 1;
1956 	bfl.bfl_list = list;
1957 	if (ioctl(cfd, BIOCGDLTLIST, &bfl) != 0) e(0);
1958 	if (bfl.bfl_len != 1) e(0);
1959 	if (bfl.bfl_list != list) e(0);
1960 	if (list[0] != DLT_RAW) e(0);
1961 	if (list[1] != 0) e(0);
1962 
1963 	memset(&bfl, 0, sizeof(bfl));
1964 	memset(list, 0, sizeof(list));
1965 	bfl.bfl_len = 2;
1966 	bfl.bfl_list = list;
1967 	if (ioctl(cfd, BIOCGDLTLIST, &bfl) != 0) e(0);
1968 	if (bfl.bfl_len != 1) e(0);
1969 	if (bfl.bfl_list != list) e(0);
1970 	if (list[0] != DLT_RAW) e(0);
1971 	if (list[1] != 0) e(0);
1972 
1973 	/*
1974 	 * For loopback devices, BIOCSSEESENT is a bit weird: packets are
1975 	 * captured on output to get a complete view of loopback traffic, and
1976 	 * not also on input because that would then duplicate the traffic.  As
1977 	 * a result, turning off BIOCSSEESENT for a loopback device means that
1978 	 * no packets will be captured at all anymore.  First test the default
1979 	 * and toggling on the unconfigured device, then reproduce the above on
1980 	 * the configured device.
1981 	 */
1982 	/* The default value is on. */
1983 	uval = 0;
1984 	if (ioctl(ufd, BIOCGSEESENT, &uval) != 0) e(0);
1985 	if (uval != 1) e(0);
1986 
1987 	uval = 0;
1988 	if (ioctl(ufd, BIOCSSEESENT, &uval) != 0) e(0);
1989 
1990 	uval = 1;
1991 	if (ioctl(ufd, BIOCGSEESENT, &uval) != 0) e(0);
1992 	if (uval != 0) e(0);
1993 
1994 	uval = 2;
1995 	if (ioctl(ufd, BIOCSSEESENT, &uval) != 0) e(0);
1996 
1997 	if (ioctl(ufd, BIOCGSEESENT, &uval) != 0) e(0);
1998 	if (uval != 1) e(0);
1999 
2000 	if (ioctl(cfd, BIOCGSEESENT, &uval) != 0) e(0);
2001 	if (uval != 1) e(0);
2002 
2003 	uval = 0;
2004 	if (ioctl(cfd, BIOCSSEESENT, &uval) != 0) e(0);
2005 
2006 	if (ioctl(cfd, BIOCFLUSH) != 0) e(0);
2007 
2008 	test94_add_random(fd2, buf, size, 1 /*seq*/);
2009 
2010 	if (ioctl(cfd, BIOCGSTATS, &bs) != 0) e(0);
2011 	if (bs.bs_recv != 0) e(0);
2012 
2013 	uval = 1;
2014 	if (ioctl(cfd, BIOCSSEESENT, &uval) != 0) e(0);
2015 
2016 	if (ioctl(cfd, BIOCFLUSH) != 0) e(0);
2017 
2018 	test94_add_random(fd2, buf, size, 1 /*seq*/);
2019 
2020 	if (ioctl(cfd, BIOCGSTATS, &bs) != 0) e(0);
2021 	if (bs.bs_recv == 0) e(0);
2022 
2023 	/*
2024 	 * The BIOCSRTIMEOUT values are rounded up to clock granularity.
2025 	 * Invalid timeout values are rejected.
2026 	 */
2027 	/* The default value is zero. */
2028 	tv.tv_sec = 99;
2029 	if (ioctl(ufd, BIOCGRTIMEOUT, &tv) != 0) e(0);
2030 	if (tv.tv_sec != 0) e(0);
2031 	if (tv.tv_usec != 0) e(0);
2032 
2033 	tv.tv_usec = 1000000;
2034 	if (ioctl(ufd, BIOCSRTIMEOUT, &tv) != -1) e(0);
2035 	if (errno != EINVAL) e(0);
2036 
2037 	tv.tv_usec = -1;
2038 	if (ioctl(ufd, BIOCSRTIMEOUT, &tv) != -1) e(0);
2039 	if (errno != EINVAL) e(0);
2040 
2041 	tv.tv_sec = -1;
2042 	tv.tv_usec = 0;
2043 	if (ioctl(ufd, BIOCSRTIMEOUT, &tv) != -1) e(0);
2044 	if (errno != EINVAL) e(0);
2045 
2046 	tv.tv_sec = INT_MAX;
2047 	if (ioctl(ufd, BIOCSRTIMEOUT, &tv) != -1) e(0);
2048 	if (errno != EDOM) e(0);
2049 
2050 	if (ioctl(ufd, BIOCGRTIMEOUT, &tv) != 0) e(0);
2051 	if (tv.tv_sec != 0) e(0);
2052 	if (tv.tv_usec != 0) e(0);
2053 
2054 	tv.tv_sec = 123;
2055 	tv.tv_usec = 1;
2056 	if (ioctl(ufd, BIOCSRTIMEOUT, &tv) != 0) e(0);
2057 
2058 	if (ioctl(ufd, BIOCGRTIMEOUT, &tv) != 0) e(0);
2059 	if (tv.tv_sec != 123) e(0);
2060 	if (tv.tv_usec == 0) e(0); /* rounding should be up */
2061 
2062 	tv.tv_sec = 0;
2063 	tv.tv_usec = 0;
2064 	if (ioctl(ufd, BIOCSRTIMEOUT, &tv) != 0) e(0);
2065 
2066 	if (ioctl(ufd, BIOCGRTIMEOUT, &tv) != 0) e(0);
2067 	if (tv.tv_sec != 0) e(0);
2068 	if (tv.tv_usec != 0) e(0);
2069 
2070 	/*
2071 	 * BIOCSFEEDBACK is another weird setting for which we only test
2072 	 * default and toggling here.
2073 	 */
2074 	/* The default value is off. */
2075 	uval = 1;
2076 	if (ioctl(ufd, BIOCGFEEDBACK, &uval) != 0) e(0);
2077 	if (uval != 0) e(0);
2078 
2079 	uval = 2;
2080 	if (ioctl(ufd, BIOCSFEEDBACK, &uval) != 0) e(0);
2081 
2082 	if (ioctl(ufd, BIOCGFEEDBACK, &uval) != 0) e(0);
2083 	if (uval != 1) e(0);
2084 
2085 	uval = 0;
2086 	if (ioctl(ufd, BIOCSFEEDBACK, &uval) != 0) e(0);
2087 
2088 	uval = 1;
2089 	if (ioctl(ufd, BIOCGFEEDBACK, &uval) != 0) e(0);
2090 	if (uval != 0) e(0);
2091 
2092 	/* Clean up. */
2093 	if (close(ufd) != 0) e(0);
2094 
2095 	test94_cleanup(cfd, fd2, fd3, buf);
2096 }
2097 
2098 /* IPv6 version of our filter. */
2099 static struct bpf_insn test94_filter6[] = {
2100 	{ BPF_LD+BPF_B+BPF_ABS, 0, 0, 0 },	/* is this an IPv6 header? */
2101 	{ BPF_ALU+BPF_RSH+BPF_K, 0, 0, 4 },
2102 	{ BPF_JMP+BPF_JEQ+BPF_K, 0, 6, 6 },
2103 	{ BPF_LD+BPF_B+BPF_ABS, 0, 0, 6 },	/* is this a UDP packet? */
2104 	{ BPF_JMP+BPF_JEQ+BPF_K, 0, 4, IPPROTO_UDP },
2105 	{ BPF_LD+BPF_H+BPF_ABS, 0, 0, 40 },	/* source port 12345? */
2106 	{ BPF_JMP+BPF_JEQ+BPF_K, 0, 2, TEST_PORT_A },
2107 	{ BPF_LD+BPF_H+BPF_ABS, 0, 0, 42 },	/* destination port 12346? */
2108 	{ BPF_JMP+BPF_JEQ+BPF_K, 1, 0, TEST_PORT_B },
2109 	{ BPF_RET+BPF_K, 0, 0, 0 },		/* reject the packet */
2110 	{ BPF_RET+BPF_K, 0, 0, (uint32_t)-1 },	/* accept the (whole) packet */
2111 };
2112 
2113 /*
2114  * Test receipt of IPv6 packets, because it was getting a bit messy to
2115  * integrate that into the previous subtests.  We just want to make sure that
2116  * IPv6 packets are properly filtered and captured at all.  The rest of the
2117  * code is entirely version agnostic anyway.
2118  */
2119 static void
2120 test94i(void)
2121 {
2122 	struct sockaddr_in6 sin6A, sin6B;
2123 	struct bpf_program bf;
2124 	struct bpf_stat bs;
2125 	struct bpf_hdr bh;
2126 	struct ifreq ifr;
2127 	struct ip6_hdr ip6;
2128 	struct udphdr uh;
2129 	uint8_t *buf, c;
2130 	socklen_t socklen;
2131 	ssize_t len;
2132 	size_t off;
2133 	unsigned int uval, size, dlt;
2134 	int fd, fd2, fd3;
2135 
2136 	subtest = 9;
2137 
2138 	if ((fd = open(_PATH_BPF, O_RDWR)) < 0) e(0);
2139 
2140 	if (ioctl(fd, BIOCGBLEN, &size) != 0) e(0);
2141 	if (size < 1024 || size > BPF_MAXBUFSIZE) e(0);
2142 
2143 	if ((buf = malloc(size)) == NULL) e(0);
2144 
2145 	/* Install the filter. */
2146 	memset(&bf, 0, sizeof(bf));
2147 	bf.bf_len = __arraycount(test94_filter6);
2148 	bf.bf_insns = test94_filter6;
2149 	if (ioctl(fd, BIOCSETF, &bf) != 0) e(0);
2150 
2151 	uval = 1;
2152 	if (ioctl(fd, BIOCIMMEDIATE, &uval) != 0) e(0);
2153 
2154 	/* Bind to the loopback device. */
2155 	memset(&ifr, 0, sizeof(ifr));
2156 	strlcpy(ifr.ifr_name, LOOPBACK_IFNAME, sizeof(ifr.ifr_name));
2157 	if (ioctl(fd, BIOCSETIF, &ifr) != 0) e(0);
2158 
2159 	/*
2160 	 * If the loopback device's data link type is not DLT_RAW, our filter
2161 	 * and size calculations will not work.
2162 	 */
2163 	if (ioctl(fd, BIOCGDLT, &dlt) != 0) e(0);
2164 	if (dlt != DLT_RAW) e(0);
2165 
2166 	/* We use UDP traffic for our test packets. */
2167 	if ((fd2 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) e(0);
2168 
2169 	memset(&sin6A, 0, sizeof(sin6A));
2170 	sin6A.sin6_family = AF_INET6;
2171 	sin6A.sin6_port = htons(TEST_PORT_A);
2172 	memcpy(&sin6A.sin6_addr, &in6addr_loopback, sizeof(sin6A.sin6_addr));
2173 	if (bind(fd2, (struct sockaddr *)&sin6A, sizeof(sin6A)) != 0) e(0);
2174 
2175 	memcpy(&sin6B, &sin6A, sizeof(sin6B));
2176 	sin6B.sin6_port = htons(TEST_PORT_B);
2177 	if (connect(fd2, (struct sockaddr *)&sin6B, sizeof(sin6B)) != 0) e(0);
2178 
2179 	if ((fd3 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) e(0);
2180 
2181 	if (bind(fd3, (struct sockaddr *)&sin6B, sizeof(sin6B)) != 0) e(0);
2182 
2183 	if (connect(fd3, (struct sockaddr *)&sin6A, sizeof(sin6A)) != 0) e(0);
2184 
2185 	if (write(fd2, "A", 1) != 1) e(0);
2186 
2187 	if (read(fd3, &c, 1) != 1) e(0);
2188 	if (c != 'A') e(0);
2189 
2190 	if (write(fd3, "B", 1) != 1) e(0);
2191 
2192 	if (read(fd2, &c, 1) != 1) e(0);
2193 	if (c != 'B') e(0);
2194 
2195 	if (ioctl(fd, BIOCGSTATS, &bs) != 0) e(0);
2196 	if (bs.bs_recv < 2) e(0);
2197 	if (bs.bs_capt != 1) e(0);
2198 	if (bs.bs_drop != 0) e(0);
2199 
2200 	memset(buf, 0, size);
2201 
2202 	len = read(fd, buf, size);
2203 
2204 	if (len != BPF_WORDALIGN(sizeof(bh)) +
2205 	    BPF_WORDALIGN(sizeof(ip6) + sizeof(uh) + 1)) e(0);
2206 
2207 	memcpy(&bh, buf, sizeof(bh));
2208 
2209 	if (bh.bh_tstamp.tv_sec == 0 && bh.bh_tstamp.tv_usec == 0) e(0);
2210 	if (bh.bh_caplen != sizeof(ip6) + sizeof(uh) + 1) e(0);
2211 	if (bh.bh_datalen != bh.bh_caplen) e(0);
2212 	if (bh.bh_hdrlen != BPF_WORDALIGN(sizeof(bh))) e(0);
2213 
2214 	if (buf[bh.bh_hdrlen + sizeof(ip6) + sizeof(uh)] != 'A') e(0);
2215 
2216 	/*
2217 	 * Finally, do a quick test to see if we can send IPv6 packets by
2218 	 * writing to the BPF device.  We rely on such packets being generated
2219 	 * properly in a later test.
2220 	 */
2221 	off = test94_make_pkt(buf, 6, 1 /*v6*/);
2222 	memcpy(buf + off, "Hello!", 6);
2223 
2224 	if (write(fd, buf, off + 6) != off + 6) e(0);
2225 
2226 	socklen = sizeof(sin6A);
2227 	if (recvfrom(fd3, buf, size, 0, (struct sockaddr *)&sin6A,
2228 	    &socklen) != 6) e(0);
2229 
2230 	if (memcmp(buf, "Hello!", 6) != 0) e(0);
2231 	if (socklen != sizeof(sin6A)) e(0);
2232 	if (sin6A.sin6_family != AF_INET6) e(0);
2233 	if (sin6A.sin6_port != htons(TEST_PORT_A)) e(0);
2234 	if (memcmp(&sin6A.sin6_addr, &in6addr_loopback,
2235 	    sizeof(sin6A.sin6_addr)) != 0) e(0);
2236 
2237 	free(buf);
2238 
2239 	if (close(fd3) != 0) e(0);
2240 
2241 	if (close(fd2) != 0) e(0);
2242 
2243 	if (close(fd) != 0) e(0);
2244 }
2245 
2246 /*
2247  * Test the BPF sysctl(7) interface at a basic level.
2248  */
2249 static void
2250 test94j(void)
2251 {
2252 	struct bpf_stat bs1, bs2;
2253 	struct bpf_d_ext *bde;
2254 	uint8_t *buf;
2255 	unsigned int slot, count, uval;
2256 	size_t len, oldlen, size, bdesize;
2257 	int fd, fd2, fd3, val, mib[5], smib[3], found;
2258 
2259 	subtest = 10;
2260 
2261 	/*
2262 	 * Obtain the maximum buffer size.  The value must be sane.
2263 	 */
2264 	memset(mib, 0, sizeof(mib));
2265 	len = __arraycount(mib);
2266 	if (sysctlnametomib("net.bpf.maxbufsize", mib, &len) != 0) e(0);
2267 	if (len != 3) e(0);
2268 
2269 	oldlen = sizeof(val);
2270 	if (sysctl(mib, len, &val, &oldlen, NULL, 0) != 0) e(0);
2271 	if (oldlen != sizeof(val)) e(0);
2272 
2273 	if (val < 1024 || val > INT_MAX / 2) e(0);
2274 
2275 	/*
2276 	 * Attempt to set the maximum buffer size.  This is not (yet) supported
2277 	 * so for now we want to make sure that it really does not work.
2278 	 */
2279 	if (sysctl(mib, len, NULL, NULL, &val, sizeof(val)) != -1) e(0);
2280 	if (errno != EPERM) e(0);
2281 
2282 	/*
2283 	 * Obtain global statistics.  We check the actual statistics later on.
2284 	 */
2285 	memset(smib, 0, sizeof(smib));
2286 	len = __arraycount(smib);
2287 	if (sysctlnametomib("net.bpf.stats", smib, &len) != 0) e(0);
2288 	if (len != 3) e(0);
2289 
2290 	oldlen = sizeof(bs1);
2291 	if (sysctl(smib, len, &bs1, &oldlen, NULL, 0) != 0) e(0);
2292 	if (oldlen != sizeof(bs1)) e(0);
2293 
2294 	/*
2295 	 * Set up a BPF descriptor, and retrieve the list of BPF peers.  We
2296 	 * should be able to find our BPF peer.
2297 	 */
2298 	memset(mib, 0, sizeof(mib));
2299 	len = __arraycount(mib);
2300 	if (sysctlnametomib("net.bpf.peers", mib, &len) != 0) e(0);
2301 	if (len != 3) e(0);
2302 	mib[len++] = sizeof(*bde);	/* size of each element */
2303 	mib[len++] = INT_MAX;		/* limit on elements to return */
2304 
2305 	size = test94_setup(&fd, &fd2, &fd3, &buf, 0 /*size*/,
2306 	    1 /*set_filter*/);
2307 
2308 	/* Generate some traffic to bump the statistics. */
2309 	count = test94_fill_exact(fd2, buf, size, 0);
2310 	test94_fill_exact(fd2, buf, size, 0);
2311 	test94_fill_exact(fd2, buf, size, 0);
2312 
2313 	if (write(fd3, "X", 1) != 1) e(0);
2314 
2315 	if (sysctl(mib, len, NULL, &oldlen, NULL, 0) != 0) e(0);
2316 	if (oldlen == 0) e(0);
2317 
2318 	/* Add some slack space ourselves to prevent problems with churn. */
2319 	bdesize = oldlen + sizeof(*bde) * 8;
2320 	if ((bde = malloc(bdesize)) == NULL) e(0);
2321 
2322 	oldlen = bdesize;
2323 	if (sysctl(mib, len, bde, &oldlen, NULL, 0) != 0) e(0);
2324 	if (oldlen % sizeof(*bde)) e(0);
2325 
2326 	found = 0;
2327 	for (slot = 0; slot < oldlen / sizeof(*bde); slot++) {
2328 		if (bde[slot].bde_pid != getpid())
2329 			continue;
2330 
2331 		if (bde[slot].bde_bufsize != size) e(0);
2332 		if (bde[slot].bde_promisc != 0) e(0);
2333 		if (bde[slot].bde_state != BPF_IDLE) e(0);
2334 		if (bde[slot].bde_immediate != 0) e(0);
2335 		if (bde[slot].bde_hdrcmplt != 0) e(0);
2336 		if (bde[slot].bde_seesent != 1) e(0);
2337 		if (bde[slot].bde_rcount < count * 3 + 1) e(0);
2338 		if (bde[slot].bde_dcount != count) e(0);
2339 		if (bde[slot].bde_ccount != count * 3) e(0);
2340 		if (strcmp(bde[slot].bde_ifname, LOOPBACK_IFNAME) != 0) e(0);
2341 
2342 		found++;
2343 	}
2344 	if (found != 1) e(0);
2345 
2346 	/*
2347 	 * If global statistics are an accumulation of individual devices'
2348 	 * statistics (they currently are not) then such a scheme should take
2349 	 * into account device flushes.
2350 	 */
2351 	if (ioctl(fd, BIOCFLUSH) != 0) e(0);
2352 
2353 	test94_cleanup(fd, fd2, fd3, buf);
2354 
2355 	/*
2356 	 * Now see if the global statistics have indeed changed correctly.
2357 	 */
2358 	oldlen = sizeof(bs2);
2359 	if (sysctl(smib, __arraycount(smib), &bs2, &oldlen, NULL, 0) != 0)
2360 		e(0);
2361 	if (oldlen != sizeof(bs2)) e(0);
2362 
2363 	if (bs2.bs_recv < bs1.bs_recv + count * 3 + 1) e(0);
2364 	if (bs2.bs_drop != bs1.bs_drop + count) e(0);
2365 	if (bs2.bs_capt != bs1.bs_capt + count * 3) e(0);
2366 
2367 	/*
2368 	 * Check an unconfigured BPF device as well.
2369 	 */
2370 	if ((fd = open(_PATH_BPF, O_RDWR)) < 0) e(0);
2371 
2372 	/*
2373 	 * Toggle some flags.  It is too much effort to test them all
2374 	 * individually (which, in the light of copy-paste mistakes, would be
2375 	 * the right thing to do) but at least we'll know something gets set.
2376 	 */
2377 	uval = 1;
2378 	if (ioctl(fd, BIOCIMMEDIATE, &uval) != 0) e(0);
2379 	if (ioctl(fd, BIOCSHDRCMPLT, &uval) != 0) e(0);
2380 
2381 	uval = 0;
2382 	if (ioctl(fd, BIOCSSEESENT, &uval) != 0) e(0);
2383 
2384 	oldlen = bdesize;
2385 	if (sysctl(mib, len, bde, &oldlen, NULL, 0) != 0) e(0);
2386 	if (oldlen % sizeof(*bde)) e(0);
2387 
2388 	found = 0;
2389 	for (slot = 0; slot < oldlen / sizeof(*bde); slot++) {
2390 		if (bde[slot].bde_pid != getpid())
2391 			continue;
2392 
2393 		if (bde[slot].bde_bufsize != size) e(0);
2394 		if (bde[slot].bde_promisc != 0) e(0);
2395 		if (bde[slot].bde_state != BPF_IDLE) e(0);
2396 		if (bde[slot].bde_immediate != 1) e(0);
2397 		if (bde[slot].bde_hdrcmplt != 1) e(0);
2398 		if (bde[slot].bde_seesent != 0) e(0);
2399 		if (bde[slot].bde_rcount != 0) e(0);
2400 		if (bde[slot].bde_dcount != 0) e(0);
2401 		if (bde[slot].bde_ccount != 0) e(0);
2402 		if (bde[slot].bde_ifname[0] != '\0') e(0);
2403 
2404 		found++;
2405 	}
2406 	if (found != 1) e(0);
2407 
2408 	close(fd);
2409 
2410 	/*
2411 	 * At this point there should be no BPF device left for our PID.
2412 	 */
2413 	oldlen = bdesize;
2414 	if (sysctl(mib, len, bde, &oldlen, NULL, 0) != 0) e(0);
2415 	if (oldlen % sizeof(*bde)) e(0);
2416 
2417 	for (slot = 0; slot < oldlen / sizeof(*bde); slot++)
2418 		if (bde[slot].bde_pid == getpid()) e(0);
2419 			found++;
2420 
2421 	free(bde);
2422 }
2423 
2424 /*
2425  * Test privileged operations as an unprivileged caller.
2426  */
2427 static void
2428 test94k(void)
2429 {
2430 	struct passwd *pw;
2431 	pid_t pid;
2432 	size_t len, oldlen;
2433 	int mib[5], status;
2434 
2435 	subtest = 11;
2436 
2437 	pid = fork();
2438 	switch (pid) {
2439 	case 0:
2440 		errct = 0;
2441 
2442 		if ((pw = getpwnam(NONROOT_USER)) == NULL) e(0);
2443 
2444 		if (setuid(pw->pw_uid) != 0) e(0);
2445 
2446 		/*
2447 		 * Opening /dev/bpf must fail.  Note that this is a system
2448 		 * configuration issue rather than a LWIP service issue.
2449 		 */
2450 		if (open(_PATH_BPF, O_RDWR) != -1) e(0);
2451 		if (errno != EACCES) e(0);
2452 
2453 		/*
2454 		 * Retrieving the net.bpf.peers list must fail, too.
2455 		 */
2456 		memset(mib, 0, sizeof(mib));
2457 		len = __arraycount(mib);
2458 		if (sysctlnametomib("net.bpf.peers", mib, &len) != 0) e(0);
2459 		if (len != 3) e(0);
2460 		mib[len++] = sizeof(struct bpf_d_ext);
2461 		mib[len++] = INT_MAX;
2462 
2463 		if (sysctl(mib, len, NULL, &oldlen, NULL, 0) != -1) e(0);
2464 		if (errno != EPERM) e(0);
2465 
2466 		exit(errct);
2467 	case -1:
2468 		e(0);
2469 
2470 		break;
2471 	default:
2472 		break;
2473 	}
2474 
2475 	if (wait(&status) != pid) e(0);
2476 	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) e(0);
2477 }
2478 
2479 /*
2480  * Test that traffic directed to loopback addresses be dropped on non-loopback
2481  * interfaces.  In particular, inbound traffic to 127.0.0.1 and ::1 should not
2482  * be accepted on any interface that does not own those addresses.  This test
2483  * is here because BPF feedback mode is (currently) the only way in which we
2484  * can generate inbound traffic the ethernet level, and even then only as a
2485  * side effect of sending outbound traffic.  That is: this test sends the same
2486  * test packets to the local network!  As such it must be performed only when
2487  * USENETWORK=yes and therefore at the user's risk.
2488  */
2489 static void
2490 test94l(void)
2491 {
2492 	struct sockaddr_in sin;
2493 	struct sockaddr_in6 sin6;
2494 	struct sockaddr_dl sdl;
2495 	struct ifreq ifr;
2496 	struct ifaddrs *ifa, *ifp;
2497 	struct if_data *ifdata;
2498 	uint8_t buf[sizeof(struct ether_header) + MAX(sizeof(struct ip),
2499 	    sizeof(struct ip6_hdr)) + sizeof(struct udphdr) + 6];
2500 	struct ether_header ether;
2501 	const uint8_t ether_src[ETHER_ADDR_LEN] =
2502 	    { 0x02, 0x00, 0x01, 0x12, 0x34, 0x56 };
2503 	unsigned int val;
2504 	size_t off;
2505 	int bfd, sfd;
2506 
2507 	subtest = 12;
2508 
2509 	if (!get_setting_use_network())
2510 		return;
2511 
2512 	memset(&ifr, 0, sizeof(ifr));
2513 	memset(&ether, 0, sizeof(ether));
2514 
2515 	/*
2516 	 * Start by finding a suitable ethernet interface that is up and of
2517 	 * which the link is not down.  Without one, we cannot perform this
2518 	 * test.  Save the interface name and the ethernet address.
2519 	 */
2520 	if (getifaddrs(&ifa) != 0) e(0);
2521 
2522 	for (ifp = ifa; ifp != NULL; ifp = ifp->ifa_next) {
2523 		if (!(ifp->ifa_flags & IFF_UP) || ifp->ifa_addr == NULL ||
2524 		    ifp->ifa_addr->sa_family != AF_LINK)
2525 			continue;
2526 
2527 		ifdata = (struct if_data *)ifp->ifa_data;
2528 		if (ifdata != NULL && ifdata->ifi_type == IFT_ETHER &&
2529 		    ifdata->ifi_link_state != LINK_STATE_DOWN) {
2530 			strlcpy(ifr.ifr_name, ifp->ifa_name,
2531 			    sizeof(ifr.ifr_name));
2532 
2533 			memcpy(&sdl, (struct sockaddr_dl *)ifp->ifa_addr,
2534 			    offsetof(struct sockaddr_dl, sdl_data));
2535 			if (sdl.sdl_alen != sizeof(ether.ether_dhost)) e(0);
2536 			memcpy(ether.ether_dhost,
2537 			    ((struct sockaddr_dl *)ifp->ifa_addr)->sdl_data +
2538 			    sdl.sdl_nlen, sdl.sdl_alen);
2539 			break;
2540 		}
2541 	}
2542 
2543 	freeifaddrs(ifa);
2544 
2545 	if (ifp == NULL)
2546 		return;
2547 
2548 	/* Open a BPF device and bind it to the ethernet interface we found. */
2549 	if ((bfd = open(_PATH_BPF, O_RDWR)) < 0) e(0);
2550 
2551 	if (ioctl(bfd, BIOCSETIF, &ifr) != 0) e(0);
2552 
2553 	if (ioctl(bfd, BIOCGDLT, &val) != 0) e(0);
2554 	if (val != DLT_EN10MB) e(0);
2555 
2556 	val = 1;
2557 	if (ioctl(bfd, BIOCSFEEDBACK, &val) != 0) e(0);
2558 
2559 	/* We use UDP traffic for our test packets, IPv4 first. */
2560 	if ((sfd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) e(0);
2561 
2562 	memset(&sin, 0, sizeof(sin));
2563 	sin.sin_family = AF_INET;
2564 	sin.sin_port = htons(TEST_PORT_B);
2565 	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
2566 	if (bind(sfd, (struct sockaddr *)&sin, sizeof(sin)) != 0) e(0);
2567 
2568 	/*
2569 	 * Construct and send a packet.  We already filled in the ethernet
2570 	 * destination address.  Put in a source address that is locally
2571 	 * administered but valid (and as such no reason for packet rejection).
2572 	 */
2573 	memcpy(ether.ether_shost, ether_src, sizeof(ether.ether_shost));
2574 	ether.ether_type = htons(ETHERTYPE_IP);
2575 
2576 	memcpy(buf, &ether, sizeof(ether));
2577 	off = sizeof(ether);
2578 	off += test94_make_pkt(buf + off, 6, 0 /*v6*/);
2579 	if (off + 6 > sizeof(buf)) e(0);
2580 	memcpy(buf + off, "Hello!", 6);
2581 
2582 	if (write(bfd, buf, off + 6) != off + 6) e(0);
2583 
2584 	/* The packet MUST NOT arrive. */
2585 	if (recv(sfd, buf, sizeof(buf), MSG_DONTWAIT) != -1) e(0);
2586 	if (errno != EWOULDBLOCK) e(0);
2587 
2588 	if (close(sfd) != 0) e(0);
2589 
2590 	/* Try the same thing, but now with an IPv6 packet. */
2591 	if ((sfd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) e(0);
2592 
2593 	memset(&sin6, 0, sizeof(sin6));
2594 	sin6.sin6_family = AF_INET6;
2595 	sin6.sin6_port = htons(TEST_PORT_B);
2596 	memcpy(&sin6.sin6_addr, &in6addr_loopback, sizeof(sin6.sin6_addr));
2597 	if (bind(sfd, (struct sockaddr *)&sin6, sizeof(sin6)) != 0) e(0);
2598 
2599 	ether.ether_type = htons(ETHERTYPE_IPV6);
2600 
2601 	memcpy(buf, &ether, sizeof(ether));
2602 	off = sizeof(ether);
2603 	off += test94_make_pkt(buf + off, 6, 1 /*v6*/);
2604 	if (off + 6 > sizeof(buf)) e(0);
2605 	memcpy(buf + off, "Hello!", 6);
2606 
2607 	if (write(bfd, buf, off + 6) != off + 6) e(0);
2608 
2609 	if (recv(sfd, buf, sizeof(buf), MSG_DONTWAIT) != -1) e(0);
2610 	if (errno != EWOULDBLOCK) e(0);
2611 
2612 	if (close(sfd) != 0) e(0);
2613 	if (close(bfd) != 0) e(0);
2614 }
2615 
2616 /*
2617  * Test program for LWIP BPF.
2618  */
2619 int
2620 main(int argc, char ** argv)
2621 {
2622 	int i, m;
2623 
2624 	start(94);
2625 
2626 	srand48(time(NULL));
2627 
2628 	if (argc == 2)
2629 		m = atoi(argv[1]);
2630 	else
2631 		m = 0xFFF;
2632 
2633 	for (i = 0; i < ITERATIONS; i++) {
2634 		if (m & 0x001) test94a();
2635 		if (m & 0x002) test94b();
2636 		if (m & 0x004) test94c();
2637 		if (m & 0x008) test94d();
2638 		if (m & 0x010) test94e();
2639 		if (m & 0x020) test94f();
2640 		if (m & 0x040) test94g();
2641 		if (m & 0x080) test94h();
2642 		if (m & 0x100) test94i();
2643 		if (m & 0x200) test94j();
2644 		if (m & 0x400) test94k();
2645 		if (m & 0x800) test94l();
2646 	}
2647 
2648 	quit();
2649 	/* NOTREACHED */
2650 }
2651