1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/stropts.h>
31 #include <sys/socket.h>
32 #include <sys/socketvar.h>
33 #include <sys/socket_proto.h>
34 #include <sys/sockio.h>
35 #include <sys/strsun.h>
36 #include <sys/kstat.h>
37 #include <sys/modctl.h>
38 #include <sys/policy.h>
39 #include <sys/priv_const.h>
40 #include <sys/tihdr.h>
41 #include <sys/zone.h>
42 #include <sys/time.h>
43 #include <fs/sockfs/sockcommon.h>
44 #include <net/if.h>
45 
46 #include <sys/dls.h>
47 #include <sys/mac.h>
48 #include <sys/mac_client.h>
49 #include <sys/mac_provider.h>
50 #include <sys/mac_client_priv.h>
51 
52 #include <netpacket/packet.h>
53 
54 static void pfp_close(mac_handle_t, mac_client_handle_t);
55 static int pfp_dl_to_arphrd(int);
56 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
57     socklen_t *);
58 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *);
59 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *);
60 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
61     cred_t *);
62 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
63 static void pfp_release_bpf(struct pfpsock *);
64 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
65 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
66     socklen_t);
67 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
68     socklen_t);
69 
70 /*
71  * PFP sockfs operations
72  * Most are currently no-ops because they have no meaning for a connectionless
73  * socket.
74  */
75 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
76     sock_upcalls_t *, int, struct cred *);
77 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
78     struct cred *);
79 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
80 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
81 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
82     socklen_t *, struct cred *);
83 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
84     struct cred *);
85 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
86     struct cred *);
87 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
88     socklen_t, struct cred *);
89 
90 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
91     uint_t *, int *, int, cred_t *);
92 
93 static int sockpfp_init(void);
94 static void sockpfp_fini(void);
95 
96 static kstat_t *pfp_ksp;
97 static pfp_kstats_t ks_stats;
98 static pfp_kstats_t pfp_kstats = {
99 	/*
100 	 * Each one of these kstats is a different return path in handling
101 	 * a packet received from the mac layer.
102 	 */
103 	{ "recvMacHeaderFail",	KSTAT_DATA_UINT64 },
104 	{ "recvBadProtocol",	KSTAT_DATA_UINT64 },
105 	{ "recvAllocbFail",	KSTAT_DATA_UINT64 },
106 	{ "recvOk",		KSTAT_DATA_UINT64 },
107 	{ "recvFail",		KSTAT_DATA_UINT64 },
108 	{ "recvFiltered",	KSTAT_DATA_UINT64 },
109 	{ "recvFlowControl",	KSTAT_DATA_UINT64 },
110 	/*
111 	 * A global set of counters is maintained to track the behaviour
112 	 * of the system (kernel & applications) in sending packets.
113 	 */
114 	{ "sendUnbound",	KSTAT_DATA_UINT64 },
115 	{ "sendFailed",		KSTAT_DATA_UINT64 },
116 	{ "sendTooBig",		KSTAT_DATA_UINT64 },
117 	{ "sendAllocFail",	KSTAT_DATA_UINT64 },
118 	{ "sendUiomoveFail",	KSTAT_DATA_UINT64 },
119 	{ "sendNoMemory",	KSTAT_DATA_UINT64 },
120 	{ "sendOpenFail",	KSTAT_DATA_UINT64 },
121 	{ "sendWrongFamily",	KSTAT_DATA_UINT64 },
122 	{ "sendShortMsg",	KSTAT_DATA_UINT64 },
123 	{ "sendOk",		KSTAT_DATA_UINT64 }
124 };
125 
126 sock_downcalls_t pfp_downcalls = {
127 	sdpfp_activate,
128 	sock_accept_notsupp,
129 	sdpfp_bind,
130 	sock_listen_notsupp,
131 	sock_connect_notsupp,
132 	sock_getpeername_notsupp,
133 	sock_getsockname_notsupp,
134 	sdpfp_getsockopt,
135 	sdpfp_setsockopt,
136 	sock_send_notsupp,
137 	sdpfp_senduio,
138 	NULL,
139 	sock_poll_notsupp,
140 	sock_shutdown_notsupp,
141 	sdpfp_clr_flowctrl,
142 	sdpfp_ioctl,
143 	sdpfp_close,
144 };
145 
146 static smod_reg_t sinfo = {
147 	SOCKMOD_VERSION,
148 	"sockpfp",
149 	SOCK_UC_VERSION,
150 	SOCK_DC_VERSION,
151 	sockpfp_create,
152 	NULL
153 };
154 
155 /*
156  * Module linkage information for the kernel.
157  */
158 static struct modlsockmod modlsockmod = {
159 	&mod_sockmodops, "PF Packet socket module", &sinfo
160 };
161 
162 static struct modlinkage modlinkage = {
163 	MODREV_1,
164 	&modlsockmod,
165 	NULL
166 };
167 
168 int
169 _init(void)
170 {
171 	int error;
172 
173 	error = sockpfp_init();
174 	if (error != 0)
175 		return (error);
176 
177 	error = mod_install(&modlinkage);
178 	if (error != 0)
179 		sockpfp_fini();
180 
181 	return (error);
182 }
183 
184 int
185 _fini(void)
186 {
187 	int error;
188 
189 	error = mod_remove(&modlinkage);
190 	if (error == 0)
191 		sockpfp_fini();
192 
193 	return (error);
194 }
195 
196 int
197 _info(struct modinfo *modinfop)
198 {
199 	return (mod_info(&modlinkage, modinfop));
200 }
201 
202 /*
203  * sockpfp_init: called as part of the initialisation of the module when
204  * loaded into the kernel.
205  *
206  * Being able to create and record the kstats data in the kernel is not
207  * considered to be vital to the operation of this kernel module, thus
208  * its failure is tolerated.
209  */
210 static int
211 sockpfp_init(void)
212 {
213 	(void) memset(&ks_stats, 0, sizeof (ks_stats));
214 
215 	(void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
216 
217 	pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
218 	    KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
219 	    KSTAT_FLAG_VIRTUAL);
220 	if (pfp_ksp != NULL) {
221 		pfp_ksp->ks_data = &ks_stats;
222 		kstat_install(pfp_ksp);
223 	}
224 
225 	return (0);
226 }
227 
228 /*
229  * sockpfp_fini: called when the operating system wants to unload the
230  * socket module from the kernel.
231  */
232 static void
233 sockpfp_fini(void)
234 {
235 	if (pfp_ksp != NULL)
236 		kstat_delete(pfp_ksp);
237 }
238 
239 /*
240  * Due to sockets being created read-write by default, all PF_PACKET sockets
241  * therefore require the NET_RAWACCESS priviliege, even if the socket is only
242  * being used for reading packets from.
243  *
244  * This create function enforces this module only being used with PF_PACKET
245  * sockets and the policy that we support via the sock2path.conf file:
246  * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
247  */
248 /* ARGSUSED */
249 static sock_lower_handle_t
250 sockpfp_create(int family, int type, int proto,
251     sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
252     int sflags, cred_t *cred)
253 {
254 	struct pfpsock *ps;
255 	int kmflags;
256 
257 	if (secpolicy_net_rawaccess(cred) != 0) {
258 		*errorp = EACCES;
259 		return (NULL);
260 	}
261 
262 	if (family != AF_PACKET) {
263 		*errorp = EAFNOSUPPORT;
264 		return (NULL);
265 	}
266 
267 	if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
268 		*errorp = ESOCKTNOSUPPORT;
269 		return (NULL);
270 	}
271 
272 	kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
273 	ps = kmem_zalloc(sizeof (*ps), kmflags);
274 	if (ps == NULL) {
275 		*errorp = ENOMEM;
276 		return (NULL);
277 	}
278 
279 	ps->ps_type = type;
280 	ps->ps_proto = proto;
281 	rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
282 	mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
283 
284 	*sock_downcalls = &pfp_downcalls;
285 	/*
286 	 * Setting this causes bytes from a packet that do not fit into the
287 	 * destination user buffer to be discarded. Thus the API is one
288 	 * packet per receive and callers are required to use a buffer large
289 	 * enough for the biggest packet that the interface can provide.
290 	 */
291 	*smodep = SM_ATOMIC;
292 
293 	return ((sock_lower_handle_t)ps);
294 }
295 
296 /* ************************************************************************* */
297 
298 /*
299  * pfp_packet is the callback function that is given to the mac layer for
300  * PF_PACKET to receive packets with. One packet at a time is passed into
301  * this function from the mac layer. Each packet is a private copy given
302  * to PF_PACKET to modify or free as it wishes and does not harm the original
303  * packet from which it was cloned.
304  */
305 /* ARGSUSED */
306 static void
307 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
308 {
309 	struct T_unitdata_ind *tunit;
310 	struct sockaddr_ll *sll;
311 	struct sockaddr_ll *sol;
312 	mac_header_info_t hdr;
313 	struct pfpsock *ps;
314 	size_t tusz;
315 	mblk_t *mp0;
316 	int error;
317 
318 	if (mp == NULL)
319 		return;
320 
321 	ps = arg;
322 	if (ps->ps_flow_ctrld) {
323 		ps->ps_flow_ctrl_drops++;
324 		ps->ps_stats.tp_drops++;
325 		ks_stats.kp_recv_flow_cntrld.value.ui64++;
326 		freemsg(mp);
327 		return;
328 	}
329 
330 	if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
331 		/*
332 		 * Can't decode the packet header information so drop it.
333 		 */
334 		ps->ps_stats.tp_drops++;
335 		ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
336 		freemsg(mp);
337 		return;
338 	}
339 
340 	if (mac_type(ps->ps_mh) == DL_ETHER &&
341 	    hdr.mhi_bindsap == ETHERTYPE_VLAN) {
342 		struct ether_vlan_header *evhp;
343 		struct ether_vlan_header evh;
344 
345 		hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
346 		hdr.mhi_istagged = B_TRUE;
347 
348 		if (MBLKL(mp) >= sizeof (*evhp)) {
349 			evhp = (struct ether_vlan_header *)mp->b_rptr;
350 		} else {
351 			int sz = sizeof (*evhp);
352 			char *s = (char *)&evh;
353 			mblk_t *tmp;
354 			int len;
355 
356 			for (tmp = mp; sz > 0 && tmp != NULL;
357 			    tmp = tmp->b_cont) {
358 				len = min(sz, MBLKL(tmp));
359 				bcopy(tmp->b_rptr, s, len);
360 				sz -= len;
361 			}
362 			evhp = &evh;
363 		}
364 		hdr.mhi_tci = ntohs(evhp->ether_tci);
365 		hdr.mhi_bindsap = ntohs(evhp->ether_type);
366 	}
367 
368 	if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
369 		/*
370 		 * The packet is not of interest to this socket so
371 		 * drop it on the floor. Here the SAP is being used
372 		 * as a very course filter.
373 		 */
374 		ps->ps_stats.tp_drops++;
375 		ks_stats.kp_recv_bad_proto.value.ui64++;
376 		freemsg(mp);
377 		return;
378 	}
379 
380 	/*
381 	 * This field is not often set, even for ethernet,
382 	 * by mac_header_info, so compute it if it is 0.
383 	 */
384 	if (hdr.mhi_pktsize == 0)
385 		hdr.mhi_pktsize = msgdsize(mp);
386 
387 	/*
388 	 * If a BPF filter is present, pass the raw packet into that.
389 	 * A failed match will result in zero being returned, indicating
390 	 * that this socket is not interested in the packet.
391 	 */
392 	if (ps->ps_bpf.bf_len != 0) {
393 		uchar_t *buffer;
394 		int buflen;
395 
396 		buflen = MBLKL(mp);
397 		if (hdr.mhi_pktsize == buflen) {
398 			buffer = mp->b_rptr;
399 		} else {
400 			buflen = 0;
401 			buffer = (uchar_t *)mp;
402 		}
403 		rw_enter(&ps->ps_bpflock, RW_READER);
404 		if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
405 		    hdr.mhi_pktsize, buflen) == 0) {
406 			rw_exit(&ps->ps_bpflock);
407 			ps->ps_stats.tp_drops++;
408 			ks_stats.kp_recv_filtered.value.ui64++;
409 			freemsg(mp);
410 			return;
411 		}
412 		rw_exit(&ps->ps_bpflock);
413 	}
414 
415 	if (ps->ps_type == SOCK_DGRAM) {
416 		/*
417 		 * SOCK_DGRAM socket expect a "layer 3" packet, so advance
418 		 * past the link layer header.
419 		 */
420 		mp->b_rptr += hdr.mhi_hdrsize;
421 		hdr.mhi_pktsize -= hdr.mhi_hdrsize;
422 	}
423 
424 	tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
425 	if (ps->ps_auxdata) {
426 		tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
427 		tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
428 	}
429 
430 	/*
431 	 * It is tempting to think that this could be optimised by having
432 	 * the base mblk_t allocated and hung off the pfpsock structure,
433 	 * except that then another one would need to be allocated for the
434 	 * sockaddr_ll that is included. Even creating a template to copy
435 	 * from is of questionable value, as read-write from one structure
436 	 * to the other is going to be slower than all of the initialisation.
437 	 */
438 	mp0 = allocb(tusz, BPRI_HI);
439 	if (mp0 == NULL) {
440 		ps->ps_stats.tp_drops++;
441 		ks_stats.kp_recv_alloc_fail.value.ui64++;
442 		freemsg(mp);
443 		return;
444 	}
445 
446 	(void) memset(mp0->b_rptr, 0, tusz);
447 
448 	mp0->b_datap->db_type = M_PROTO;
449 	mp0->b_wptr = mp0->b_rptr + tusz;
450 
451 	tunit = (struct T_unitdata_ind *)mp0->b_rptr;
452 	tunit->PRIM_type = T_UNITDATA_IND;
453 	tunit->SRC_length = sizeof (struct sockaddr);
454 	tunit->SRC_offset = sizeof (*tunit);
455 
456 	sol = (struct sockaddr_ll *)&ps->ps_sock;
457 	sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
458 	sll->sll_ifindex = sol->sll_ifindex;
459 	sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
460 	sll->sll_halen = sol->sll_halen;
461 	if (hdr.mhi_saddr != NULL)
462 		(void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
463 
464 	switch (hdr.mhi_dsttype) {
465 	case MAC_ADDRTYPE_MULTICAST :
466 		sll->sll_pkttype = PACKET_MULTICAST;
467 		break;
468 	case MAC_ADDRTYPE_BROADCAST :
469 		sll->sll_pkttype = PACKET_BROADCAST;
470 		break;
471 	case MAC_ADDRTYPE_UNICAST :
472 		if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
473 			sll->sll_pkttype = PACKET_HOST;
474 		else
475 			sll->sll_pkttype = PACKET_OTHERHOST;
476 		break;
477 	}
478 
479 	if (ps->ps_auxdata) {
480 		struct tpacket_auxdata *aux;
481 		struct T_opthdr *topt;
482 
483 		tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
484 		    sizeof (struct sockaddr_ll));
485 		tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
486 		    _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
487 
488 		topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
489 		aux = (struct tpacket_auxdata *)
490 		    ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
491 
492 		topt->len = tunit->OPT_length;
493 		topt->level = SOL_PACKET;
494 		topt->name = PACKET_AUXDATA;
495 		topt->status = 0;
496 		/*
497 		 * libpcap doesn't seem to use any other field,
498 		 * so it isn't clear how they should be filled in.
499 		 */
500 		aux->tp_vlan_vci = hdr.mhi_tci;
501 	}
502 
503 	linkb(mp0, mp);
504 
505 	ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
506 	    &error, NULL);
507 
508 	if (error == 0) {
509 		ps->ps_stats.tp_packets++;
510 		ks_stats.kp_recv_ok.value.ui64++;
511 	} else {
512 		mutex_enter(&ps->ps_lock);
513 		if (error == ENOSPC) {
514 			ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
515 			    &error, NULL);
516 			if (error == ENOSPC)
517 				ps->ps_flow_ctrld = B_TRUE;
518 		}
519 		mutex_exit(&ps->ps_lock);
520 		ps->ps_stats.tp_drops++;
521 		ks_stats.kp_recv_fail.value.ui64++;
522 	}
523 }
524 
525 /*
526  * Bind a PF_PACKET socket to a network interface.
527  *
528  * The default operation of this bind() is to place the socket (and thus the
529  * network interface) into promiscuous mode. It is then up to the application
530  * to turn that down by issuing the relevant ioctls, if desired.
531  */
532 /* ARGSUSED */
533 static int
534 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
535     socklen_t addrlen, struct cred *cred)
536 {
537 	struct sockaddr_ll *addr_ll, *sol;
538 	mac_client_handle_t mch;
539 	struct pfpsock *ps;
540 	mac_handle_t mh;
541 	int error;
542 
543 	ps = (struct pfpsock *)handle;
544 	if (ps->ps_bound)
545 		return (EINVAL);
546 
547 	addr_ll = (struct sockaddr_ll *)addr;
548 
549 	error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
550 	if (error != 0)
551 		return (error);
552 	/*
553 	 * Ensure that each socket is only bound once.
554 	 */
555 	mutex_enter(&ps->ps_lock);
556 	if (ps->ps_mh != 0) {
557 		mutex_exit(&ps->ps_lock);
558 		pfp_close(mh, mch);
559 		return (EADDRINUSE);
560 	}
561 	ps->ps_mh = mh;
562 	ps->ps_mch = mch;
563 	mutex_exit(&ps->ps_lock);
564 
565 	/*
566 	 * Cache all of the information from bind so that it's in an easy
567 	 * place to get at when packets are received.
568 	 */
569 	sol = (struct sockaddr_ll *)&ps->ps_sock;
570 	sol->sll_family = AF_PACKET;
571 	sol->sll_ifindex = addr_ll->sll_ifindex;
572 	sol->sll_protocol = addr_ll->sll_protocol;
573 	sol->sll_halen = mac_addr_len(ps->ps_mh);
574 	mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
575 	mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
576 	ps->ps_linkid = addr_ll->sll_ifindex;
577 
578 	error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
579 	    pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
580 	if (error == 0) {
581 		ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
582 		ps->ps_bound = B_TRUE;
583 	}
584 
585 	return (error);
586 }
587 
588 /* ARGSUSED */
589 static void
590 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
591     sock_upcalls_t *upcalls, int flags, cred_t *cred)
592 {
593 	struct pfpsock *ps;
594 
595 	ps = (struct pfpsock *)lower;
596 	ps->ps_upper = upper;
597 	ps->ps_upcalls = upcalls;
598 }
599 
600 /*
601  * This module only implements getting socket options for the new socket
602  * option level (SOL_PACKET) that it introduces. All other requests are
603  * passed back to the sockfs layer.
604  */
605 /* ARGSUSED */
606 static int
607 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
608     void *optval, socklen_t *optlenp, struct cred *cred)
609 {
610 	int error = 0;
611 
612 	switch (level) {
613 	case SOL_PACKET :
614 		error = pfp_getpacket_sockopt(handle, option_name, optval,
615 		    optlenp);
616 		break;
617 	default :
618 		/*
619 		 * If sockfs code receives this error in return from the
620 		 * getsockopt downcall it handles the option locally, if
621 		 * it can. This implements SO_RCVBUF, etc.
622 		 */
623 		error = ENOPROTOOPT;
624 		break;
625 	}
626 
627 	return (error);
628 }
629 
630 /*
631  * PF_PACKET supports setting socket options at only two levels:
632  * SOL_SOCKET and SOL_PACKET.
633  */
634 /* ARGSUSED */
635 static int
636 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
637     const void *optval, socklen_t optlen, struct cred *cred)
638 {
639 	int error = 0;
640 
641 	switch (level) {
642 	case SOL_SOCKET :
643 		error = pfp_setsocket_sockopt(handle, option_name, optval,
644 		    optlen);
645 		break;
646 	case SOL_PACKET :
647 		error = pfp_setpacket_sockopt(handle, option_name, optval,
648 		    optlen);
649 		break;
650 	default :
651 		error = EINVAL;
652 		break;
653 	}
654 
655 	return (error);
656 }
657 
658 /*
659  * This function is incredibly inefficient for sending any packet that
660  * comes with a msghdr asking to be sent to an interface to which the
661  * socket has not been bound. Some possibilities here are keeping a
662  * cache of all open mac's and mac_client's, for the purpose of sending,
663  * and closing them after some amount of inactivity. Clearly, applications
664  * should not be written to use one socket for multiple interfaces if
665  * performance is desired with the code as is.
666  */
667 /* ARGSUSED */
668 static int
669 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
670     struct nmsghdr *msg, struct cred *cred)
671 {
672 	struct sockaddr_ll *sol;
673 	mac_client_handle_t mch;
674 	struct pfpsock *ps;
675 	boolean_t new_open;
676 	mac_handle_t mh;
677 	size_t mpsize;
678 	uint_t maxsdu;
679 	mblk_t *mp0;
680 	mblk_t *mp;
681 	int error;
682 
683 	mp = NULL;
684 	mp0 = NULL;
685 	new_open = B_FALSE;
686 	ps = (struct pfpsock *)handle;
687 	mh = ps->ps_mh;
688 	mch = ps->ps_mch;
689 	maxsdu = ps->ps_max_sdu;
690 
691 	sol = (struct sockaddr_ll *)msg->msg_name;
692 	if (sol == NULL) {
693 		/*
694 		 * If no sockaddr_ll has been provided with the send call,
695 		 * use the one constructed when the socket was bound to an
696 		 * interface and fail if it hasn't been bound.
697 		 */
698 		if (!ps->ps_bound) {
699 			ks_stats.kp_send_unbound.value.ui64++;
700 			return (EPROTO);
701 		}
702 		sol = (struct sockaddr_ll *)&ps->ps_sock;
703 	} else {
704 		/*
705 		 * Verify the sockaddr_ll message passed down before using
706 		 * it to send a packet out with. If it refers to an interface
707 		 * that has not been bound, it is necessary to open it.
708 		 */
709 		struct sockaddr_ll *sll;
710 
711 		if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
712 			ks_stats.kp_send_short_msg.value.ui64++;
713 			return (EINVAL);
714 		}
715 
716 		if (sol->sll_family != AF_PACKET) {
717 			ks_stats.kp_send_wrong_family.value.ui64++;
718 			return (EAFNOSUPPORT);
719 		}
720 
721 		sll = (struct sockaddr_ll *)&ps->ps_sock;
722 		if (sol->sll_ifindex != sll->sll_ifindex) {
723 			error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
724 			    cred);
725 			if (error != 0) {
726 				ks_stats.kp_send_open_fail.value.ui64++;
727 				return (error);
728 			}
729 			mac_sdu_get(mh, NULL, &maxsdu);
730 			new_open = B_TRUE;
731 		}
732 	}
733 
734 	mpsize = uiop->uio_resid;
735 	if (mpsize > maxsdu) {
736 		ks_stats.kp_send_too_big.value.ui64++;
737 		error = EMSGSIZE;
738 		goto done;
739 	}
740 
741 	if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
742 		ks_stats.kp_send_alloc_fail.value.ui64++;
743 		error = ENOBUFS;
744 		goto done;
745 	}
746 
747 	mp->b_wptr = mp->b_rptr + mpsize;
748 	error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
749 	if (error != 0) {
750 		ks_stats.kp_send_uiomove_fail.value.ui64++;
751 		goto done;
752 	}
753 
754 	if (ps->ps_type == SOCK_DGRAM) {
755 		mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
756 		if (mp0 == NULL) {
757 			ks_stats.kp_send_no_memory.value.ui64++;
758 			error = ENOBUFS;
759 			goto done;
760 		}
761 		linkb(mp0, mp);
762 		mp = mp0;
763 	}
764 
765 	/*
766 	 * As this is sending datagrams and no promise is made about
767 	 * how or if a packet will be sent/delivered, no effort is to
768 	 * be expended in recovering from a situation where the packet
769 	 * cannot be sent - it is just dropped.
770 	 */
771 	error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
772 	if (error == 0) {
773 		mp = NULL;
774 		ks_stats.kp_send_ok.value.ui64++;
775 	} else {
776 		ks_stats.kp_send_failed.value.ui64++;
777 	}
778 
779 done:
780 
781 	if (new_open) {
782 		ASSERT(mch != ps->ps_mch);
783 		ASSERT(mh != ps->ps_mh);
784 		pfp_close(mh, mch);
785 	}
786 	if (mp != NULL)
787 		freemsg(mp);
788 
789 	return (error);
790 
791 }
792 
793 /*
794  * There's no use of a lock here, or at the bottom of pfp_packet() where
795  * ps_flow_ctrld is set to true, because in a situation where these two
796  * are racing to set the flag one way or the other, the end result is
797  * going to be ultimately determined by the scheduler anyway - which of
798  * the two threads gets the lock first? In such an operational environment,
799  * we've got packets arriving too fast to be delt with so packets are going
800  * to be dropped. Grabbing a lock just makes the drop more expensive.
801  */
802 static void
803 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
804 {
805 	struct pfpsock *ps;
806 
807 	ps = (struct pfpsock *)handle;
808 
809 	mutex_enter(&ps->ps_lock);
810 	ps->ps_flow_ctrld = B_FALSE;
811 	mutex_exit(&ps->ps_lock);
812 }
813 
814 /*
815  * The implementation of this ioctl() handler is intended to function
816  * in the absence of a bind() being made before it is called. Thus the
817  * function calls mac_open() itself to provide a handle
818  * This function is structured like this:
819  * - determine the linkid for the interface being targetted
820  * - open the interface with said linkid
821  * - perform ioctl
822  * - copy results back to caller
823  *
824  * The ioctls that interact with interface flags have been implented below
825  * to assume that the interface is always up and running (IFF_RUNNING) and
826  * to use the state of this socket to determine whether or not the network
827  * interface is in promiscuous mode. Thus an ioctl to get the interface flags
828  * of an interface that has been put in promiscuous mode by another socket
829  * (in the same program or different), will not report that status.
830  */
831 /* ARGSUSED */
832 static int
833 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
834     int32_t *rval, struct cred *cr)
835 {
836 #if defined(_SYSCALL32)
837 	struct timeval32 tival;
838 #else
839 	struct timeval tival;
840 #endif
841 	mac_client_promisc_type_t mtype;
842 	datalink_id_t linkid;
843 	struct lifreq lifreq;
844 	struct ifreq ifreq;
845 	struct pfpsock *ps;
846 	mac_handle_t mh;
847 	timespec_t tv;
848 	int error;
849 
850 	switch (cmd) {
851 	/*
852 	 * ioctls that work on "struct lifreq"
853 	 */
854 	case SIOCSLIFFLAGS :
855 	case SIOCGLIFINDEX :
856 	case SIOCGLIFFLAGS :
857 	case SIOCGLIFMTU :
858 		error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid);
859 		if (error != 0)
860 			return (error);
861 		break;
862 
863 	/*
864 	 * ioctls that work on "struct ifreq".
865 	 * Not all of these have a "struct lifreq" partner, for example
866 	 * SIOCGIFHWADDR, for the simple reason that the logical interface
867 	 * does not have a hardware address.
868 	 */
869 	case SIOCSIFFLAGS :
870 	case SIOCGIFINDEX :
871 	case SIOCGIFFLAGS :
872 	case SIOCGIFMTU :
873 	case SIOCGIFHWADDR :
874 		error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid);
875 		if (error != 0)
876 			return (error);
877 		break;
878 	}
879 
880 	error =  mac_open_by_linkid(linkid, &mh);
881 	if (error != 0)
882 		return (error);
883 
884 	ps = (struct pfpsock *)handle;
885 
886 	switch (cmd) {
887 	case SIOCGLIFINDEX :
888 		lifreq.lifr_index = linkid;
889 		break;
890 
891 	case SIOCGIFINDEX :
892 		ifreq.ifr_index = linkid;
893 		break;
894 
895 	case SIOCGIFFLAGS :
896 		ifreq.ifr_flags = IFF_RUNNING;
897 		if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
898 			ifreq.ifr_flags |= IFF_PROMISC;
899 		break;
900 
901 	case SIOCGLIFFLAGS :
902 		lifreq.lifr_flags = IFF_RUNNING;
903 		if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
904 			lifreq.lifr_flags |= IFF_PROMISC;
905 		break;
906 
907 	case SIOCSIFFLAGS :
908 		if (linkid != ps->ps_linkid) {
909 			error = EINVAL;
910 		} else {
911 			if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
912 				mtype = MAC_CLIENT_PROMISC_ALL;
913 			else
914 				mtype = MAC_CLIENT_PROMISC_FILTERED;
915 			error = pfp_set_promisc(ps, mtype);
916 		}
917 		break;
918 
919 	case SIOCSLIFFLAGS :
920 		if (linkid != ps->ps_linkid) {
921 			error = EINVAL;
922 		} else {
923 			if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
924 				mtype = MAC_CLIENT_PROMISC_ALL;
925 			else
926 				mtype = MAC_CLIENT_PROMISC_FILTERED;
927 			error = pfp_set_promisc(ps, mtype);
928 		}
929 		break;
930 
931 	case SIOCGIFMTU :
932 		mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
933 		break;
934 
935 	case SIOCGLIFMTU :
936 		mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
937 		break;
938 
939 	case SIOCGIFHWADDR :
940 		mac_unicast_primary_get(mh, (uint8_t *)ifreq.ifr_addr.sa_data);
941 		ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
942 		break;
943 
944 	case SIOCGSTAMP :
945 		(void) gethrestime(&tv);
946 		tival.tv_sec = (time_t)tv.tv_sec;
947 		tival.tv_usec = tv.tv_nsec / 1000;
948 		error = ddi_copyout(&tival, (void *)arg, sizeof (tival), 0);
949 		break;
950 
951 	default :
952 		break;
953 	}
954 
955 	mac_close(mh);
956 
957 	if (error == 0) {
958 		/*
959 		 * Only the "GET" ioctls need to copy data back to userace.
960 		 */
961 		switch (cmd) {
962 		case SIOCGLIFINDEX :
963 		case SIOCGLIFFLAGS :
964 		case SIOCGLIFMTU :
965 			error = ddi_copyout(&lifreq, (void *)arg,
966 			    sizeof (lifreq), 0);
967 			break;
968 
969 		case SIOCGIFINDEX :
970 		case SIOCGIFFLAGS :
971 		case SIOCGIFMTU :
972 		case SIOCGIFHWADDR :
973 			error = ddi_copyout(&ifreq, (void *)arg,
974 			    sizeof (ifreq), 0);
975 			break;
976 		default :
977 			break;
978 		}
979 	}
980 
981 	return (error);
982 }
983 
984 /*
985  * Closing the socket requires that all open references to network
986  * interfaces be closed.
987  */
988 /* ARGSUSED */
989 static int
990 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
991 {
992 	struct pfpsock *ps = (struct pfpsock *)handle;
993 
994 	if (ps->ps_phd != 0) {
995 		mac_promisc_remove(ps->ps_phd);
996 		ps->ps_phd = 0;
997 	}
998 
999 	if (ps->ps_mch != 0) {
1000 		mac_client_close(ps->ps_mch, 0);
1001 		ps->ps_mch = 0;
1002 	}
1003 
1004 	if (ps->ps_mh != 0) {
1005 		mac_close(ps->ps_mh);
1006 		ps->ps_mh = 0;
1007 	}
1008 
1009 	kmem_free(ps, sizeof (*ps));
1010 
1011 	return (0);
1012 }
1013 
1014 /* ************************************************************************* */
1015 
1016 /*
1017  * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1018  * determine the linkid for the interface name stored in that structure.
1019  * name is used as a buffer so that we can ensure a trailing \0 is appended
1020  * to the name safely.
1021  */
1022 static int
1023 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1024     datalink_id_t *linkidp)
1025 {
1026 	char name[IFNAMSIZ + 1];
1027 	int error;
1028 
1029 	if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), 0) != 0)
1030 		return (EFAULT);
1031 
1032 	(void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1033 
1034 	error = dls_mgmt_get_linkid(name, linkidp);
1035 	if (error != 0)
1036 		error = dls_devnet_macname2linkid(name, linkidp);
1037 
1038 	return (error);
1039 }
1040 
1041 /*
1042  * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1043  * determine the linkid for the interface name stored in that structure.
1044  * name is used as a buffer so that we can ensure a trailing \0 is appended
1045  * to the name safely.
1046  */
1047 static int
1048 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1049     datalink_id_t *linkidp)
1050 {
1051 	char name[LIFNAMSIZ + 1];
1052 	int error;
1053 
1054 	if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), 0) != 0)
1055 		return (EFAULT);
1056 
1057 	(void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1058 
1059 	error = dls_mgmt_get_linkid(name, linkidp);
1060 	if (error != 0)
1061 		error = dls_devnet_macname2linkid(name, linkidp);
1062 
1063 	return (error);
1064 }
1065 
1066 /*
1067  * Although there are several new SOL_PACKET options that can be set and
1068  * are specific to this implementation of PF_PACKET, the current API does
1069  * not support doing a get on them to retrieve accompanying status. Thus
1070  * it is only currently possible to use SOL_PACKET with getsockopt to
1071  * retrieve statistical information. This remains consistant with the
1072  * Linux API at the time of writing.
1073  */
1074 static int
1075 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1076     void *optval, socklen_t *optlenp)
1077 {
1078 	struct pfpsock *ps;
1079 	int error = 0;
1080 
1081 	ps = (struct pfpsock *)handle;
1082 
1083 	switch (option_name) {
1084 	case PACKET_STATISTICS :
1085 		if (*optlenp < sizeof (ps->ps_stats)) {
1086 			error = EINVAL;
1087 			break;
1088 		}
1089 		*optlenp = sizeof (ps->ps_stats);
1090 		bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1091 		break;
1092 	default :
1093 		error = EINVAL;
1094 		break;
1095 	}
1096 
1097 	return (error);
1098 }
1099 
1100 /*
1101  * The SOL_PACKET level for socket options supports three options,
1102  * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1103  * This function is responsible for mapping the two socket options
1104  * that manage multicast membership into the appropriate internal
1105  * function calls to bring the option into effect. Whilst direct
1106  * changes to the multicast membership (ADD/DROP) groups is handled
1107  * by calls directly into the mac module, changes to the promiscuos
1108  * mode are vectored through pfp_set_promisc() so that the logic for
1109  * managing the promiscuous mode is in one place.
1110  */
1111 /* ARGSUSED */
1112 static int
1113 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1114     const void *optval, socklen_t optlen)
1115 {
1116 	struct packet_mreq mreq;
1117 	struct pfpsock *ps;
1118 	int error = 0;
1119 	int opt;
1120 
1121 	ps = (struct pfpsock *)handle;
1122 	if (!ps->ps_bound)
1123 		return (EPROTO);
1124 
1125 	if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1126 	    (option_name == PACKET_DROP_MEMBERSHIP)) {
1127 		if (!ps->ps_bound)
1128 			return (EPROTO);
1129 		bcopy(optval, &mreq, sizeof (mreq));
1130 		if (ps->ps_linkid != mreq.mr_ifindex)
1131 			return (EINVAL);
1132 
1133 		if (mreq.mr_alen !=
1134 		    ((struct sockaddr_ll *)&ps->ps_sock)->sll_halen)
1135 			return (EINVAL);
1136 	}
1137 
1138 	switch (option_name) {
1139 	case PACKET_ADD_MEMBERSHIP :
1140 		switch (mreq.mr_type) {
1141 		case PACKET_MR_MULTICAST :
1142 			error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1143 			break;
1144 
1145 		case PACKET_MR_PROMISC :
1146 			error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1147 			break;
1148 
1149 		case PACKET_MR_ALLMULTI :
1150 			error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1151 			break;
1152 		}
1153 		break;
1154 
1155 	case PACKET_DROP_MEMBERSHIP :
1156 		switch (mreq.mr_type) {
1157 		case PACKET_MR_MULTICAST :
1158 			mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1159 			break;
1160 
1161 		case PACKET_MR_PROMISC :
1162 			if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1163 				return (EINVAL);
1164 			error = pfp_set_promisc(ps,
1165 			    MAC_CLIENT_PROMISC_FILTERED);
1166 			break;
1167 
1168 		case PACKET_MR_ALLMULTI :
1169 			if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1170 				return (EINVAL);
1171 			error = pfp_set_promisc(ps,
1172 			    MAC_CLIENT_PROMISC_FILTERED);
1173 			break;
1174 		}
1175 		break;
1176 
1177 	case PACKET_AUXDATA :
1178 		if (optlen == sizeof (int)) {
1179 			opt = *(int *)optval;
1180 			ps->ps_auxdata = (opt != 0);
1181 		} else {
1182 			error = EINVAL;
1183 		}
1184 		break;
1185 	default :
1186 		error = EINVAL;
1187 		break;
1188 	}
1189 
1190 	return (error);
1191 }
1192 
1193 /*
1194  * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1195  * SO_ATTACH_FILTER and SO_DETACH_FILTER. All other setsockopt requests
1196  * that are for SOL_SOCKET are passed back to the socket layer for its
1197  * generic implementation.
1198  *
1199  * Both of these setsockopt values are candidates for being handled by the
1200  * socket layer itself in future, however this requires understanding how
1201  * they would interact with all other sockets.
1202  */
1203 static int
1204 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1205     const void *optval, socklen_t optlen)
1206 {
1207 	struct bpf_program prog;
1208 	struct bpf_insn *fcode;
1209 	struct pfpsock *ps;
1210 	int error = 0;
1211 	int size;
1212 
1213 	ps = (struct pfpsock *)handle;
1214 
1215 	switch (option_name) {
1216 	case SO_ATTACH_FILTER :
1217 #ifdef _LP64
1218 		if (optlen == sizeof (struct bpf_program32)) {
1219 			struct bpf_program32 prog32;
1220 
1221 			bcopy(optval, &prog32, sizeof (prog32));
1222 			prog.bf_len = prog32.bf_len;
1223 			prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1224 		} else
1225 #endif
1226 		if (optlen == sizeof (struct bpf_program)) {
1227 			bcopy(optval, &prog, sizeof (prog));
1228 		} else if (optlen != sizeof (struct bpf_program)) {
1229 			return (EINVAL);
1230 		}
1231 
1232 		size = prog.bf_len * sizeof (*prog.bf_insns);
1233 		fcode = kmem_alloc(size, KM_SLEEP);
1234 		if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1235 			kmem_free(fcode, size);
1236 			return (EFAULT);
1237 		}
1238 
1239 		if (bpf_validate(fcode, (int)prog.bf_len)) {
1240 			rw_enter(&ps->ps_bpflock, RW_WRITER);
1241 			pfp_release_bpf(ps);
1242 			ps->ps_bpf.bf_insns = fcode;
1243 			ps->ps_bpf.bf_len = size;
1244 			rw_exit(&ps->ps_bpflock);
1245 
1246 			return (0);
1247 		}
1248 		kmem_free(fcode, size);
1249 		error = EINVAL;
1250 		break;
1251 
1252 	case SO_DETACH_FILTER :
1253 		pfp_release_bpf(ps);
1254 		break;
1255 	default :
1256 		/*
1257 		 * If sockfs code receives this error in return from the
1258 		 * getsockopt downcall it handles the option locally, if
1259 		 * it can. This implements SO_RCVBUF, etc.
1260 		 */
1261 		error = ENOPROTOOPT;
1262 		break;
1263 	}
1264 
1265 	return (error);
1266 }
1267 
1268 /*
1269  * pfp_open_index is an internal function used to open a MAC device by
1270  * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1271  * because some of the interfaces provided by the mac layer require either
1272  * only the mac_handle_t or both it and mac_handle_t.
1273  *
1274  * Whilst inside the kernel we can access data structures supporting any
1275  * zone, access to interfaces from non-global zones is restricted to those
1276  * interfaces (if any) that are exclusively assigned to a zone.
1277  */
1278 static int
1279 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1280     cred_t *cred)
1281 {
1282 	mac_client_handle_t mch;
1283 	zoneid_t ifzoneid;
1284 	mac_handle_t mh;
1285 	zoneid_t zoneid;
1286 	int error;
1287 
1288 	mh = 0;
1289 	mch = 0;
1290 	error = mac_open_by_linkid(index, &mh);
1291 	if (error != 0)
1292 		goto bad_open;
1293 
1294 	error = mac_client_open(mh, &mch, NULL,
1295 	    MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1296 	if (error != 0)
1297 		goto bad_open;
1298 
1299 	zoneid = crgetzoneid(cred);
1300 	if (zoneid != GLOBAL_ZONEID) {
1301 		mac_perim_handle_t perim;
1302 
1303 		mac_perim_enter_by_mh(mh, &perim);
1304 		error = dls_link_getzid(mac_client_name(mch), &ifzoneid);
1305 		mac_perim_exit(perim);
1306 		if (error != 0)
1307 			goto bad_open;
1308 		if (ifzoneid != zoneid) {
1309 			error = EACCES;
1310 			goto bad_open;
1311 		}
1312 	}
1313 
1314 	*mcip = mch;
1315 	*mhp = mh;
1316 
1317 	return (0);
1318 bad_open:
1319 	if (mch != 0)
1320 		mac_client_close(mch, 0);
1321 	if (mh != 0)
1322 		mac_close(mh);
1323 	return (error);
1324 }
1325 
1326 static void
1327 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1328 {
1329 	mac_client_close(mch, 0);
1330 	mac_close(mh);
1331 }
1332 
1333 /*
1334  * The purpose of this function is to provide a single place where we free
1335  * the loaded BPF program and reset all pointers/counters associated with
1336  * it.
1337  */
1338 static void
1339 pfp_release_bpf(struct pfpsock *ps)
1340 {
1341 	if (ps->ps_bpf.bf_len != 0) {
1342 		kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1343 		ps->ps_bpf.bf_len = 0;
1344 		ps->ps_bpf.bf_insns = NULL;
1345 	}
1346 }
1347 
1348 /*
1349  * Set the promiscuous mode of a network interface.
1350  * This function only calls the mac layer when there is a change to the
1351  * status of a network interface's promiscous mode. Tracking of how many
1352  * sockets have the network interface in promiscuous mode, and thus the
1353  * control over the physical device's status, is left to the mac layer.
1354  */
1355 static int
1356 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1357 {
1358 	int error = 0;
1359 	int flags;
1360 
1361 	/*
1362 	 * There are 4 combinations of turnon/ps_promisc.
1363 	 * This if handles 2 (both false, both true) and the if() below
1364 	 * handles the remaining one - when change is required.
1365 	 */
1366 	if (turnon == ps->ps_promisc)
1367 		return (error);
1368 
1369 	if (ps->ps_phd != 0) {
1370 		mac_promisc_remove(ps->ps_phd);
1371 		ps->ps_phd = 0;
1372 
1373 		/*
1374 		 * ps_promisc is set here in case the call to mac_promisc_add
1375 		 * fails: leaving it to indicate that the interface is still
1376 		 * in some sort of promiscuous mode is false.
1377 		 */
1378 		if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1379 			ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1380 			flags = MAC_PROMISC_FLAGS_NO_PHYS;
1381 		} else {
1382 			flags = 0;
1383 		}
1384 		flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1385 	}
1386 
1387 	error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1388 	    &ps->ps_phd, flags);
1389 	if (error == 0)
1390 		ps->ps_promisc = turnon;
1391 
1392 	return (error);
1393 }
1394 
1395 /*
1396  * This table maps the MAC types in Solaris to the ARPHRD_* values used
1397  * on Linux. This is used with the SIOCGIFHWADDR ioctl.
1398  */
1399 static uint_t arphrd_to_dl[][2] = {
1400 	{ ARPHRD_ETHER,		DL_ETHER },
1401 	{ ARPHRD_IEEE80211,	DL_WIFI },
1402 	{ 0,			0 }
1403 };
1404 
1405 static int
1406 pfp_dl_to_arphrd(int dltype)
1407 {
1408 	int i;
1409 
1410 	for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1411 		if (arphrd_to_dl[i][1] == dltype)
1412 			return (arphrd_to_dl[i][0]);
1413 	return (0);
1414 }
1415