xref: /freebsd/sys/netgraph/ng_socket.c (revision c697fb7f)
1 /*
2  * ng_socket.c
3  */
4 
5 /*-
6  * Copyright (c) 1996-1999 Whistle Communications, Inc.
7  * All rights reserved.
8  *
9  * Subject to the following obligations and disclaimer of warranty, use and
10  * redistribution of this software, in source or object code forms, with or
11  * without modifications are expressly permitted by Whistle Communications;
12  * provided, however, that:
13  * 1. Any and all reproductions of the source or object code must include the
14  *    copyright notice above and the following disclaimer of warranties; and
15  * 2. No rights are granted, in any manner or form, to use Whistle
16  *    Communications, Inc. trademarks, including the mark "WHISTLE
17  *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
18  *    such appears in the above copyright notice or in the software.
19  *
20  * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
21  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
22  * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
23  * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
24  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
25  * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
26  * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
27  * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
28  * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
29  * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
30  * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
31  * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
32  * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
35  * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
36  * OF SUCH DAMAGE.
37  *
38  * Author: Julian Elischer <julian@freebsd.org>
39  *
40  * $FreeBSD$
41  * $Whistle: ng_socket.c,v 1.28 1999/11/01 09:24:52 julian Exp $
42  */
43 
44 /*
45  * Netgraph socket nodes
46  *
47  * There are two types of netgraph sockets, control and data.
48  * Control sockets have a netgraph node, but data sockets are
49  * parasitic on control sockets, and have no node of their own.
50  */
51 
52 #include <sys/param.h>
53 #include <sys/domain.h>
54 #include <sys/hash.h>
55 #include <sys/kernel.h>
56 #include <sys/linker.h>
57 #include <sys/lock.h>
58 #include <sys/malloc.h>
59 #include <sys/mbuf.h>
60 #include <sys/mutex.h>
61 #include <sys/proc.h>
62 #include <sys/epoch.h>
63 #include <sys/priv.h>
64 #include <sys/protosw.h>
65 #include <sys/queue.h>
66 #include <sys/socket.h>
67 #include <sys/socketvar.h>
68 #include <sys/syscallsubr.h>
69 #include <sys/sysctl.h>
70 
71 #include <net/vnet.h>
72 
73 #include <netgraph/ng_message.h>
74 #include <netgraph/netgraph.h>
75 #include <netgraph/ng_socketvar.h>
76 #include <netgraph/ng_socket.h>
77 
78 #ifdef NG_SEPARATE_MALLOC
79 static MALLOC_DEFINE(M_NETGRAPH_PATH, "netgraph_path", "netgraph path info");
80 static MALLOC_DEFINE(M_NETGRAPH_SOCK, "netgraph_sock", "netgraph socket info");
81 #else
82 #define M_NETGRAPH_PATH M_NETGRAPH
83 #define M_NETGRAPH_SOCK M_NETGRAPH
84 #endif
85 
86 /*
87  * It's Ascii-art time!
88  *   +-------------+   +-------------+
89  *   |socket  (ctl)|   |socket (data)|
90  *   +-------------+   +-------------+
91  *          ^                 ^
92  *          |                 |
93  *          v                 v
94  *    +-----------+     +-----------+
95  *    |pcb   (ctl)|     |pcb  (data)|
96  *    +-----------+     +-----------+
97  *          ^                 ^
98  *          |                 |
99  *          v                 v
100  *      +--------------------------+
101  *      |   Socket type private    |
102  *      |       data               |
103  *      +--------------------------+
104  *                   ^
105  *                   |
106  *                   v
107  *           +----------------+
108  *           | struct ng_node |
109  *           +----------------+
110  */
111 
112 /* Netgraph node methods */
113 static ng_constructor_t	ngs_constructor;
114 static ng_rcvmsg_t	ngs_rcvmsg;
115 static ng_shutdown_t	ngs_shutdown;
116 static ng_newhook_t	ngs_newhook;
117 static ng_connect_t	ngs_connect;
118 static ng_findhook_t	ngs_findhook;
119 static ng_rcvdata_t	ngs_rcvdata;
120 static ng_disconnect_t	ngs_disconnect;
121 
122 /* Internal methods */
123 static int	ng_attach_data(struct socket *so);
124 static int	ng_attach_cntl(struct socket *so);
125 static int	ng_attach_common(struct socket *so, int type);
126 static void	ng_detach_common(struct ngpcb *pcbp, int type);
127 static void	ng_socket_free_priv(struct ngsock *priv);
128 static int	ng_connect_data(struct sockaddr *nam, struct ngpcb *pcbp);
129 static int	ng_bind(struct sockaddr *nam, struct ngpcb *pcbp);
130 
131 static int	ngs_mod_event(module_t mod, int event, void *data);
132 static void	ng_socket_item_applied(void *context, int error);
133 
134 /* Netgraph type descriptor */
135 static struct ng_type typestruct = {
136 	.version =	NG_ABI_VERSION,
137 	.name =		NG_SOCKET_NODE_TYPE,
138 	.mod_event =	ngs_mod_event,
139 	.constructor =	ngs_constructor,
140 	.rcvmsg =	ngs_rcvmsg,
141 	.shutdown =	ngs_shutdown,
142 	.newhook =	ngs_newhook,
143 	.connect =	ngs_connect,
144 	.findhook =	ngs_findhook,
145 	.rcvdata =	ngs_rcvdata,
146 	.disconnect =	ngs_disconnect,
147 };
148 NETGRAPH_INIT_ORDERED(socket, &typestruct, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
149 
150 /* Buffer space */
151 static u_long ngpdg_sendspace = 20 * 1024;	/* really max datagram size */
152 SYSCTL_ULONG(_net_graph, OID_AUTO, maxdgram, CTLFLAG_RW,
153     &ngpdg_sendspace , 0, "Maximum outgoing Netgraph datagram size");
154 static u_long ngpdg_recvspace = 20 * 1024;
155 SYSCTL_ULONG(_net_graph, OID_AUTO, recvspace, CTLFLAG_RW,
156     &ngpdg_recvspace , 0, "Maximum space for incoming Netgraph datagrams");
157 
158 /* List of all sockets (for netstat -f netgraph) */
159 static LIST_HEAD(, ngpcb) ngsocklist;
160 
161 static struct mtx	ngsocketlist_mtx;
162 
163 #define sotongpcb(so) ((struct ngpcb *)(so)->so_pcb)
164 
165 /* If getting unexplained errors returned, set this to "kdb_enter("X"); */
166 #ifndef TRAP_ERROR
167 #define TRAP_ERROR
168 #endif
169 
170 struct hookpriv {
171 	LIST_ENTRY(hookpriv)	next;
172 	hook_p			hook;
173 };
174 LIST_HEAD(ngshash, hookpriv);
175 
176 /* Per-node private data */
177 struct ngsock {
178 	struct ng_node	*node;		/* the associated netgraph node */
179 	struct ngpcb	*datasock;	/* optional data socket */
180 	struct ngpcb	*ctlsock;	/* optional control socket */
181 	struct ngshash	*hash;		/* hash for hook names */
182 	u_long		hmask;		/* hash mask */
183 	int	flags;
184 	int	refs;
185 	struct mtx	mtx;		/* mtx to wait on */
186 	int		error;		/* place to store error */
187 };
188 
189 #define	NGS_FLAG_NOLINGER	1	/* close with last hook */
190 
191 /***************************************************************
192 	Control sockets
193 ***************************************************************/
194 
195 static int
196 ngc_attach(struct socket *so, int proto, struct thread *td)
197 {
198 	struct ngpcb *const pcbp = sotongpcb(so);
199 	int error;
200 
201 	error = priv_check(td, PRIV_NETGRAPH_CONTROL);
202 	if (error)
203 		return (error);
204 	if (pcbp != NULL)
205 		return (EISCONN);
206 	return (ng_attach_cntl(so));
207 }
208 
209 static void
210 ngc_detach(struct socket *so)
211 {
212 	struct ngpcb *const pcbp = sotongpcb(so);
213 
214 	KASSERT(pcbp != NULL, ("ngc_detach: pcbp == NULL"));
215 	ng_detach_common(pcbp, NG_CONTROL);
216 }
217 
218 static int
219 ngc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
220 	 struct mbuf *control, struct thread *td)
221 {
222 	struct ngpcb *const pcbp = sotongpcb(so);
223 	struct ngsock *const priv = NG_NODE_PRIVATE(pcbp->sockdata->node);
224 	struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr;
225 	struct ng_mesg *msg;
226 	struct mbuf *m0;
227 	item_p item;
228 	char *path = NULL;
229 	int len, error = 0;
230 	struct ng_apply_info apply;
231 
232 	if (control) {
233 		error = EINVAL;
234 		goto release;
235 	}
236 
237 	/* Require destination as there may be >= 1 hooks on this node. */
238 	if (addr == NULL) {
239 		error = EDESTADDRREQ;
240 		goto release;
241 	}
242 
243 	/*
244 	 * Allocate an expendable buffer for the path, chop off
245 	 * the sockaddr header, and make sure it's NUL terminated.
246 	 */
247 	len = sap->sg_len - 2;
248 	path = malloc(len + 1, M_NETGRAPH_PATH, M_WAITOK);
249 	bcopy(sap->sg_data, path, len);
250 	path[len] = '\0';
251 
252 	/*
253 	 * Move the actual message out of mbufs into a linear buffer.
254 	 * Start by adding up the size of the data. (could use mh_len?)
255 	 */
256 	for (len = 0, m0 = m; m0 != NULL; m0 = m0->m_next)
257 		len += m0->m_len;
258 
259 	/*
260 	 * Move the data into a linear buffer as well.
261 	 * Messages are not delivered in mbufs.
262 	 */
263 	msg = malloc(len + 1, M_NETGRAPH_MSG, M_WAITOK);
264 	m_copydata(m, 0, len, (char *)msg);
265 
266 	if (msg->header.version != NG_VERSION) {
267 		free(msg, M_NETGRAPH_MSG);
268 		error = EINVAL;
269 		goto release;
270 	}
271 
272 	/*
273 	 * Hack alert!
274 	 * We look into the message and if it mkpeers a node of unknown type, we
275 	 * try to load it. We need to do this now, in syscall thread, because if
276 	 * message gets queued and applied later we will get panic.
277 	 */
278 	if (msg->header.typecookie == NGM_GENERIC_COOKIE &&
279 	    msg->header.cmd == NGM_MKPEER) {
280 		struct ngm_mkpeer *const mkp = (struct ngm_mkpeer *) msg->data;
281 
282 		if (ng_findtype(mkp->type) == NULL) {
283 			char filename[NG_TYPESIZ + 3];
284 			int fileid;
285 
286 			/* Not found, try to load it as a loadable module. */
287 			snprintf(filename, sizeof(filename), "ng_%s",
288 			    mkp->type);
289 			error = kern_kldload(curthread, filename, &fileid);
290 			if (error != 0) {
291 				free(msg, M_NETGRAPH_MSG);
292 				goto release;
293 			}
294 
295 			/* See if type has been loaded successfully. */
296 			if (ng_findtype(mkp->type) == NULL) {
297 				free(msg, M_NETGRAPH_MSG);
298 				(void)kern_kldunload(curthread, fileid,
299 				    LINKER_UNLOAD_NORMAL);
300 				error =  ENXIO;
301 				goto release;
302 			}
303 		}
304 	}
305 
306 	item = ng_package_msg(msg, NG_WAITOK);
307 	if ((error = ng_address_path((pcbp->sockdata->node), item, path, 0))
308 	    != 0) {
309 #ifdef TRACE_MESSAGES
310 		printf("ng_address_path: errx=%d\n", error);
311 #endif
312 		goto release;
313 	}
314 
315 #ifdef TRACE_MESSAGES
316 	printf("[%x]:<---------[socket]: c=<%d>cmd=%x(%s) f=%x #%d (%s)\n",
317 		item->el_dest->nd_ID,
318 		msg->header.typecookie,
319 		msg->header.cmd,
320 		msg->header.cmdstr,
321 		msg->header.flags,
322 		msg->header.token,
323 		item->el_dest->nd_type->name);
324 #endif
325 	SAVE_LINE(item);
326 	/*
327 	 * We do not want to return from syscall until the item
328 	 * is processed by destination node. We register callback
329 	 * on the item, which will update priv->error when item
330 	 * was applied.
331 	 * If ng_snd_item() has queued item, we sleep until
332 	 * callback wakes us up.
333 	 */
334 	bzero(&apply, sizeof(apply));
335 	apply.apply = ng_socket_item_applied;
336 	apply.context = priv;
337 	item->apply = &apply;
338 	priv->error = -1;
339 
340 	error = ng_snd_item(item, 0);
341 
342 	mtx_lock(&priv->mtx);
343 	if (priv->error == -1)
344 		msleep(priv, &priv->mtx, 0, "ngsock", 0);
345 	mtx_unlock(&priv->mtx);
346 	KASSERT(priv->error != -1,
347 	    ("ng_socket: priv->error wasn't updated"));
348 	error = priv->error;
349 
350 release:
351 	if (path != NULL)
352 		free(path, M_NETGRAPH_PATH);
353 	if (control != NULL)
354 		m_freem(control);
355 	if (m != NULL)
356 		m_freem(m);
357 	return (error);
358 }
359 
360 static int
361 ngc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
362 {
363 	struct ngpcb *const pcbp = sotongpcb(so);
364 
365 	if (pcbp == NULL)
366 		return (EINVAL);
367 	return (ng_bind(nam, pcbp));
368 }
369 
370 static int
371 ngc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
372 {
373 	/*
374 	 * At this time refuse to do this.. it used to
375 	 * do something but it was undocumented and not used.
376 	 */
377 	printf("program tried to connect control socket to remote node\n");
378 	return (EINVAL);
379 }
380 
381 /***************************************************************
382 	Data sockets
383 ***************************************************************/
384 
385 static int
386 ngd_attach(struct socket *so, int proto, struct thread *td)
387 {
388 	struct ngpcb *const pcbp = sotongpcb(so);
389 
390 	if (pcbp != NULL)
391 		return (EISCONN);
392 	return (ng_attach_data(so));
393 }
394 
395 static void
396 ngd_detach(struct socket *so)
397 {
398 	struct ngpcb *const pcbp = sotongpcb(so);
399 
400 	KASSERT(pcbp != NULL, ("ngd_detach: pcbp == NULL"));
401 	ng_detach_common(pcbp, NG_DATA);
402 }
403 
404 static int
405 ngd_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
406 	 struct mbuf *control, struct thread *td)
407 {
408 	struct epoch_tracker et;
409 	struct ngpcb *const pcbp = sotongpcb(so);
410 	struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr;
411 	int	len, error;
412 	hook_p  hook = NULL;
413 	item_p	item;
414 	char	hookname[NG_HOOKSIZ];
415 
416 	if ((pcbp == NULL) || (control != NULL)) {
417 		error = EINVAL;
418 		goto release;
419 	}
420 	if (pcbp->sockdata == NULL) {
421 		error = ENOTCONN;
422 		goto release;
423 	}
424 
425 	if (sap == NULL)
426 		len = 0;		/* Make compiler happy. */
427 	else
428 		len = sap->sg_len - 2;
429 
430 	/*
431 	 * If the user used any of these ways to not specify an address
432 	 * then handle specially.
433 	 */
434 	if ((sap == NULL) || (len <= 0) || (*sap->sg_data == '\0')) {
435 		if (NG_NODE_NUMHOOKS(pcbp->sockdata->node) != 1) {
436 			error = EDESTADDRREQ;
437 			goto release;
438 		}
439 		/*
440 		 * If exactly one hook exists, just use it.
441 		 * Special case to allow write(2) to work on an ng_socket.
442 		 */
443 		hook = LIST_FIRST(&pcbp->sockdata->node->nd_hooks);
444 	} else {
445 		if (len >= NG_HOOKSIZ) {
446 			error = EINVAL;
447 			goto release;
448 		}
449 
450 		/*
451 		 * chop off the sockaddr header, and make sure it's NUL
452 		 * terminated
453 		 */
454 		bcopy(sap->sg_data, hookname, len);
455 		hookname[len] = '\0';
456 
457 		/* Find the correct hook from 'hookname' */
458 		hook = ng_findhook(pcbp->sockdata->node, hookname);
459 		if (hook == NULL) {
460 			error = EHOSTUNREACH;
461 			goto release;
462 		}
463 	}
464 
465 	/* Send data. */
466 	item = ng_package_data(m, NG_WAITOK);
467 	m = NULL;
468 	NET_EPOCH_ENTER(et);
469 	NG_FWD_ITEM_HOOK(error, item, hook);
470 	NET_EPOCH_EXIT(et);
471 
472 release:
473 	if (control != NULL)
474 		m_freem(control);
475 	if (m != NULL)
476 		m_freem(m);
477 	return (error);
478 }
479 
480 static int
481 ngd_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
482 {
483 	struct ngpcb *const pcbp = sotongpcb(so);
484 
485 	if (pcbp == NULL)
486 		return (EINVAL);
487 	return (ng_connect_data(nam, pcbp));
488 }
489 
490 /*
491  * Used for both data and control sockets
492  */
493 static int
494 ng_getsockaddr(struct socket *so, struct sockaddr **addr)
495 {
496 	struct ngpcb *pcbp;
497 	struct sockaddr_ng *sg;
498 	int sg_len;
499 	int error = 0;
500 
501 	pcbp = sotongpcb(so);
502 	if ((pcbp == NULL) || (pcbp->sockdata == NULL))
503 		/* XXXGL: can this still happen? */
504 		return (EINVAL);
505 
506 	sg_len = sizeof(struct sockaddr_ng) + NG_NODESIZ -
507 	    sizeof(sg->sg_data);
508 	sg = malloc(sg_len, M_SONAME, M_WAITOK | M_ZERO);
509 
510 	mtx_lock(&pcbp->sockdata->mtx);
511 	if (pcbp->sockdata->node != NULL) {
512 		node_p node = pcbp->sockdata->node;
513 
514 		if (NG_NODE_HAS_NAME(node))
515 			bcopy(NG_NODE_NAME(node), sg->sg_data,
516 			    strlen(NG_NODE_NAME(node)));
517 		mtx_unlock(&pcbp->sockdata->mtx);
518 
519 		sg->sg_len = sg_len;
520 		sg->sg_family = AF_NETGRAPH;
521 		*addr = (struct sockaddr *)sg;
522 	} else {
523 		mtx_unlock(&pcbp->sockdata->mtx);
524 		free(sg, M_SONAME);
525 		error = EINVAL;
526 	}
527 
528 	return (error);
529 }
530 
531 /*
532  * Attach a socket to it's protocol specific partner.
533  * For a control socket, actually create a netgraph node and attach
534  * to it as well.
535  */
536 
537 static int
538 ng_attach_cntl(struct socket *so)
539 {
540 	struct ngsock *priv;
541 	struct ngpcb *pcbp;
542 	node_p node;
543 	int error;
544 
545 	/* Setup protocol control block */
546 	if ((error = ng_attach_common(so, NG_CONTROL)) != 0)
547 		return (error);
548 	pcbp = sotongpcb(so);
549 
550 	/* Make the generic node components */
551 	if ((error = ng_make_node_common(&typestruct, &node)) != 0) {
552 		ng_detach_common(pcbp, NG_CONTROL);
553 		return (error);
554 	}
555 
556 	/*
557 	 * Allocate node private info and hash. We start
558 	 * with 16 hash entries, however we may grow later
559 	 * in ngs_newhook(). We can't predict how much hooks
560 	 * does this node plan to have.
561 	 */
562 	priv = malloc(sizeof(*priv), M_NETGRAPH_SOCK, M_WAITOK | M_ZERO);
563 	priv->hash = hashinit(16, M_NETGRAPH_SOCK, &priv->hmask);
564 
565 	/* Initialize mutex. */
566 	mtx_init(&priv->mtx, "ng_socket", NULL, MTX_DEF);
567 
568 	/* Link the pcb the private data. */
569 	priv->ctlsock = pcbp;
570 	pcbp->sockdata = priv;
571 	priv->refs++;
572 	priv->node = node;
573 	pcbp->node_id = node->nd_ID;	/* hint for netstat(1) */
574 
575 	/* Link the node and the private data. */
576 	NG_NODE_SET_PRIVATE(priv->node, priv);
577 	NG_NODE_REF(priv->node);
578 	priv->refs++;
579 
580 	return (0);
581 }
582 
583 static int
584 ng_attach_data(struct socket *so)
585 {
586 	return (ng_attach_common(so, NG_DATA));
587 }
588 
589 /*
590  * Set up a socket protocol control block.
591  * This code is shared between control and data sockets.
592  */
593 static int
594 ng_attach_common(struct socket *so, int type)
595 {
596 	struct ngpcb *pcbp;
597 	int error;
598 
599 	/* Standard socket setup stuff. */
600 	error = soreserve(so, ngpdg_sendspace, ngpdg_recvspace);
601 	if (error)
602 		return (error);
603 
604 	/* Allocate the pcb. */
605 	pcbp = malloc(sizeof(struct ngpcb), M_PCB, M_WAITOK | M_ZERO);
606 	pcbp->type = type;
607 
608 	/* Link the pcb and the socket. */
609 	so->so_pcb = (caddr_t)pcbp;
610 	pcbp->ng_socket = so;
611 
612 	/* Add the socket to linked list */
613 	mtx_lock(&ngsocketlist_mtx);
614 	LIST_INSERT_HEAD(&ngsocklist, pcbp, socks);
615 	mtx_unlock(&ngsocketlist_mtx);
616 	return (0);
617 }
618 
619 /*
620  * Disassociate the socket from it's protocol specific
621  * partner. If it's attached to a node's private data structure,
622  * then unlink from that too. If we were the last socket attached to it,
623  * then shut down the entire node. Shared code for control and data sockets.
624  */
625 static void
626 ng_detach_common(struct ngpcb *pcbp, int which)
627 {
628 	struct ngsock *priv = pcbp->sockdata;
629 
630 	if (priv != NULL) {
631 		mtx_lock(&priv->mtx);
632 
633 		switch (which) {
634 		case NG_CONTROL:
635 			priv->ctlsock = NULL;
636 			break;
637 		case NG_DATA:
638 			priv->datasock = NULL;
639 			break;
640 		default:
641 			panic("%s", __func__);
642 		}
643 		pcbp->sockdata = NULL;
644 		pcbp->node_id = 0;
645 
646 		ng_socket_free_priv(priv);
647 	}
648 
649 	pcbp->ng_socket->so_pcb = NULL;
650 	mtx_lock(&ngsocketlist_mtx);
651 	LIST_REMOVE(pcbp, socks);
652 	mtx_unlock(&ngsocketlist_mtx);
653 	free(pcbp, M_PCB);
654 }
655 
656 /*
657  * Remove a reference from node private data.
658  */
659 static void
660 ng_socket_free_priv(struct ngsock *priv)
661 {
662 	mtx_assert(&priv->mtx, MA_OWNED);
663 
664 	priv->refs--;
665 
666 	if (priv->refs == 0) {
667 		mtx_destroy(&priv->mtx);
668 		hashdestroy(priv->hash, M_NETGRAPH_SOCK, priv->hmask);
669 		free(priv, M_NETGRAPH_SOCK);
670 		return;
671 	}
672 
673 	if ((priv->refs == 1) && (priv->node != NULL)) {
674 		node_p node = priv->node;
675 
676 		priv->node = NULL;
677 		mtx_unlock(&priv->mtx);
678 		NG_NODE_UNREF(node);
679 		ng_rmnode_self(node);
680 	} else
681 		mtx_unlock(&priv->mtx);
682 }
683 
684 /*
685  * Connect the data socket to a named control socket node.
686  */
687 static int
688 ng_connect_data(struct sockaddr *nam, struct ngpcb *pcbp)
689 {
690 	struct sockaddr_ng *sap;
691 	node_p farnode;
692 	struct ngsock *priv;
693 	int error;
694 	item_p item;
695 
696 	/* If we are already connected, don't do it again. */
697 	if (pcbp->sockdata != NULL)
698 		return (EISCONN);
699 
700 	/*
701 	 * Find the target (victim) and check it doesn't already have
702 	 * a data socket. Also check it is a 'socket' type node.
703 	 * Use ng_package_data() and ng_address_path() to do this.
704 	 */
705 
706 	sap = (struct sockaddr_ng *) nam;
707 	/* The item will hold the node reference. */
708 	item = ng_package_data(NULL, NG_WAITOK);
709 
710 	if ((error = ng_address_path(NULL, item,  sap->sg_data, 0)))
711 		return (error); /* item is freed on failure */
712 
713 	/*
714 	 * Extract node from item and free item. Remember we now have
715 	 * a reference on the node. The item holds it for us.
716 	 * when we free the item we release the reference.
717 	 */
718 	farnode = item->el_dest; /* shortcut */
719 	if (strcmp(farnode->nd_type->name, NG_SOCKET_NODE_TYPE) != 0) {
720 		NG_FREE_ITEM(item); /* drop the reference to the node */
721 		return (EINVAL);
722 	}
723 	priv = NG_NODE_PRIVATE(farnode);
724 	if (priv->datasock != NULL) {
725 		NG_FREE_ITEM(item);	/* drop the reference to the node */
726 		return (EADDRINUSE);
727 	}
728 
729 	/*
730 	 * Link the PCB and the private data struct. and note the extra
731 	 * reference. Drop the extra reference on the node.
732 	 */
733 	mtx_lock(&priv->mtx);
734 	priv->datasock = pcbp;
735 	pcbp->sockdata = priv;
736 	pcbp->node_id = priv->node->nd_ID;	/* hint for netstat(1) */
737 	priv->refs++;
738 	mtx_unlock(&priv->mtx);
739 	NG_FREE_ITEM(item);	/* drop the reference to the node */
740 	return (0);
741 }
742 
743 /*
744  * Binding a socket means giving the corresponding node a name
745  */
746 static int
747 ng_bind(struct sockaddr *nam, struct ngpcb *pcbp)
748 {
749 	struct ngsock *const priv = pcbp->sockdata;
750 	struct sockaddr_ng *const sap = (struct sockaddr_ng *) nam;
751 
752 	if (priv == NULL) {
753 		TRAP_ERROR;
754 		return (EINVAL);
755 	}
756 	if ((sap->sg_len < 4) || (sap->sg_len > (NG_NODESIZ + 2)) ||
757 	    (sap->sg_data[0] == '\0') ||
758 	    (sap->sg_data[sap->sg_len - 3] != '\0')) {
759 		TRAP_ERROR;
760 		return (EINVAL);
761 	}
762 	return (ng_name_node(priv->node, sap->sg_data));
763 }
764 
765 /***************************************************************
766 	Netgraph node
767 ***************************************************************/
768 
769 /*
770  * You can only create new nodes from the socket end of things.
771  */
772 static int
773 ngs_constructor(node_p nodep)
774 {
775 	return (EINVAL);
776 }
777 
778 static void
779 ngs_rehash(node_p node)
780 {
781 	struct ngsock *priv = NG_NODE_PRIVATE(node);
782 	struct ngshash *new;
783 	struct hookpriv *hp;
784 	hook_p hook;
785 	uint32_t h;
786 	u_long hmask;
787 
788 	new = hashinit_flags((priv->hmask + 1) * 2, M_NETGRAPH_SOCK, &hmask,
789 	    HASH_NOWAIT);
790 	if (new == NULL)
791 		return;
792 
793 	LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) {
794 		hp = NG_HOOK_PRIVATE(hook);
795 #ifdef INVARIANTS
796 		LIST_REMOVE(hp, next);
797 #endif
798 		h = hash32_str(NG_HOOK_NAME(hook), HASHINIT) & hmask;
799 		LIST_INSERT_HEAD(&new[h], hp, next);
800 	}
801 
802 	hashdestroy(priv->hash, M_NETGRAPH_SOCK, priv->hmask);
803 	priv->hash = new;
804 	priv->hmask = hmask;
805 }
806 
807 /*
808  * We allow any hook to be connected to the node.
809  * There is no per-hook private information though.
810  */
811 static int
812 ngs_newhook(node_p node, hook_p hook, const char *name)
813 {
814 	struct ngsock *const priv = NG_NODE_PRIVATE(node);
815 	struct hookpriv *hp;
816 	uint32_t h;
817 
818 	hp = malloc(sizeof(*hp), M_NETGRAPH_SOCK, M_NOWAIT);
819 	if (hp == NULL)
820 		return (ENOMEM);
821 	if (node->nd_numhooks * 2 > priv->hmask)
822 		ngs_rehash(node);
823 	hp->hook = hook;
824 	h = hash32_str(name, HASHINIT) & priv->hmask;
825 	LIST_INSERT_HEAD(&priv->hash[h], hp, next);
826 	NG_HOOK_SET_PRIVATE(hook, hp);
827 
828 	return (0);
829 }
830 
831 /*
832  * If only one hook, allow read(2) and write(2) to work.
833  */
834 static int
835 ngs_connect(hook_p hook)
836 {
837 	node_p node = NG_HOOK_NODE(hook);
838 	struct ngsock *priv = NG_NODE_PRIVATE(node);
839 
840 	if ((priv->datasock) && (priv->datasock->ng_socket)) {
841 		if (NG_NODE_NUMHOOKS(node) == 1)
842 			priv->datasock->ng_socket->so_state |= SS_ISCONNECTED;
843 		else
844 			priv->datasock->ng_socket->so_state &= ~SS_ISCONNECTED;
845 	}
846 	return (0);
847 }
848 
849 /* Look up hook by name */
850 static hook_p
851 ngs_findhook(node_p node, const char *name)
852 {
853 	struct ngsock *priv = NG_NODE_PRIVATE(node);
854 	struct hookpriv *hp;
855 	uint32_t h;
856 
857 	/*
858 	 * Microoptimisation for an ng_socket with
859 	 * a single hook, which is a common case.
860 	 */
861 	if (node->nd_numhooks == 1) {
862 		hook_p hook;
863 
864 		hook = LIST_FIRST(&node->nd_hooks);
865 
866 		if (strcmp(NG_HOOK_NAME(hook), name) == 0)
867 			return (hook);
868 		else
869 			return (NULL);
870 	}
871 
872 	h = hash32_str(name, HASHINIT) & priv->hmask;
873 
874 	LIST_FOREACH(hp, &priv->hash[h], next)
875 		if (strcmp(NG_HOOK_NAME(hp->hook), name) == 0)
876 			return (hp->hook);
877 
878 	return (NULL);
879 }
880 
881 /*
882  * Incoming messages get passed up to the control socket.
883  * Unless they are for us specifically (socket_type)
884  */
885 static int
886 ngs_rcvmsg(node_p node, item_p item, hook_p lasthook)
887 {
888 	struct ngsock *const priv = NG_NODE_PRIVATE(node);
889 	struct ngpcb *pcbp;
890 	struct socket *so;
891 	struct sockaddr_ng addr;
892 	struct ng_mesg *msg;
893 	struct mbuf *m;
894 	ng_ID_t	retaddr = NGI_RETADDR(item);
895 	int addrlen;
896 	int error = 0;
897 
898 	NGI_GET_MSG(item, msg);
899 	NG_FREE_ITEM(item);
900 
901 	/*
902 	 * Grab priv->mtx here to prevent destroying of control socket
903 	 * after checking that priv->ctlsock is not NULL.
904 	 */
905 	mtx_lock(&priv->mtx);
906 	pcbp = priv->ctlsock;
907 
908 	/*
909 	 * Only allow mesgs to be passed if we have the control socket.
910 	 * Data sockets can only support the generic messages.
911 	 */
912 	if (pcbp == NULL) {
913 		mtx_unlock(&priv->mtx);
914 		TRAP_ERROR;
915 		NG_FREE_MSG(msg);
916 		return (EINVAL);
917 	}
918 	so = pcbp->ng_socket;
919 	SOCKBUF_LOCK(&so->so_rcv);
920 
921 	/* As long as the race is handled, priv->mtx may be unlocked now. */
922 	mtx_unlock(&priv->mtx);
923 
924 #ifdef TRACE_MESSAGES
925 	printf("[%x]:---------->[socket]: c=<%d>cmd=%x(%s) f=%x #%d\n",
926 		retaddr,
927 		msg->header.typecookie,
928 		msg->header.cmd,
929 		msg->header.cmdstr,
930 		msg->header.flags,
931 		msg->header.token);
932 #endif
933 
934 	if (msg->header.typecookie == NGM_SOCKET_COOKIE) {
935 		switch (msg->header.cmd) {
936 		case NGM_SOCK_CMD_NOLINGER:
937 			priv->flags |= NGS_FLAG_NOLINGER;
938 			break;
939 		case NGM_SOCK_CMD_LINGER:
940 			priv->flags &= ~NGS_FLAG_NOLINGER;
941 			break;
942 		default:
943 			error = EINVAL;		/* unknown command */
944 		}
945 		SOCKBUF_UNLOCK(&so->so_rcv);
946 
947 		/* Free the message and return. */
948 		NG_FREE_MSG(msg);
949 		return (error);
950 	}
951 
952 	/* Get the return address into a sockaddr. */
953 	bzero(&addr, sizeof(addr));
954 	addr.sg_len = sizeof(addr);
955 	addr.sg_family = AF_NETGRAPH;
956 	addrlen = snprintf((char *)&addr.sg_data, sizeof(addr.sg_data),
957 	    "[%x]:", retaddr);
958 	if (addrlen < 0 || addrlen > sizeof(addr.sg_data)) {
959 		SOCKBUF_UNLOCK(&so->so_rcv);
960 		printf("%s: snprintf([%x]) failed - %d\n", __func__, retaddr,
961 		    addrlen);
962 		NG_FREE_MSG(msg);
963 		return (EINVAL);
964 	}
965 
966 	/* Copy the message itself into an mbuf chain. */
967 	m = m_devget((caddr_t)msg, sizeof(struct ng_mesg) + msg->header.arglen,
968 	    0, NULL, NULL);
969 
970 	/*
971 	 * Here we free the message. We need to do that
972 	 * regardless of whether we got mbufs.
973 	 */
974 	NG_FREE_MSG(msg);
975 
976 	if (m == NULL) {
977 		SOCKBUF_UNLOCK(&so->so_rcv);
978 		TRAP_ERROR;
979 		return (ENOBUFS);
980 	}
981 
982 	/* Send it up to the socket. */
983 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)&addr, m,
984 	    NULL) == 0) {
985 		SOCKBUF_UNLOCK(&so->so_rcv);
986 		TRAP_ERROR;
987 		m_freem(m);
988 		return (ENOBUFS);
989 	}
990 	sorwakeup_locked(so);
991 
992 	return (error);
993 }
994 
995 /*
996  * Receive data on a hook
997  */
998 static int
999 ngs_rcvdata(hook_p hook, item_p item)
1000 {
1001 	struct ngsock *const priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
1002 	struct ngpcb *const pcbp = priv->datasock;
1003 	struct socket *so;
1004 	struct sockaddr_ng *addr;
1005 	char *addrbuf[NG_HOOKSIZ + 4];
1006 	int addrlen;
1007 	struct mbuf *m;
1008 
1009 	NGI_GET_M(item, m);
1010 	NG_FREE_ITEM(item);
1011 
1012 	/* If there is no data socket, black-hole it. */
1013 	if (pcbp == NULL) {
1014 		NG_FREE_M(m);
1015 		return (0);
1016 	}
1017 	so = pcbp->ng_socket;
1018 
1019 	/* Get the return address into a sockaddr. */
1020 	addrlen = strlen(NG_HOOK_NAME(hook));	/* <= NG_HOOKSIZ - 1 */
1021 	addr = (struct sockaddr_ng *) addrbuf;
1022 	addr->sg_len = addrlen + 3;
1023 	addr->sg_family = AF_NETGRAPH;
1024 	bcopy(NG_HOOK_NAME(hook), addr->sg_data, addrlen);
1025 	addr->sg_data[addrlen] = '\0';
1026 
1027 	/* Try to tell the socket which hook it came in on. */
1028 	if (sbappendaddr(&so->so_rcv, (struct sockaddr *)addr, m, NULL) == 0) {
1029 		m_freem(m);
1030 		TRAP_ERROR;
1031 		return (ENOBUFS);
1032 	}
1033 	sorwakeup(so);
1034 	return (0);
1035 }
1036 
1037 /*
1038  * Hook disconnection
1039  *
1040  * For this type, removal of the last link destroys the node
1041  * if the NOLINGER flag is set.
1042  */
1043 static int
1044 ngs_disconnect(hook_p hook)
1045 {
1046 	node_p node = NG_HOOK_NODE(hook);
1047 	struct ngsock *const priv = NG_NODE_PRIVATE(node);
1048 	struct hookpriv *hp = NG_HOOK_PRIVATE(hook);
1049 
1050 	LIST_REMOVE(hp, next);
1051 	free(hp, M_NETGRAPH_SOCK);
1052 
1053 	if ((priv->datasock) && (priv->datasock->ng_socket)) {
1054 		if (NG_NODE_NUMHOOKS(node) == 1)
1055 			priv->datasock->ng_socket->so_state |= SS_ISCONNECTED;
1056 		else
1057 			priv->datasock->ng_socket->so_state &= ~SS_ISCONNECTED;
1058 	}
1059 
1060 	if ((priv->flags & NGS_FLAG_NOLINGER) &&
1061 	    (NG_NODE_NUMHOOKS(node) == 0) && (NG_NODE_IS_VALID(node)))
1062 		ng_rmnode_self(node);
1063 
1064 	return (0);
1065 }
1066 
1067 /*
1068  * Do local shutdown processing.
1069  * In this case, that involves making sure the socket
1070  * knows we should be shutting down.
1071  */
1072 static int
1073 ngs_shutdown(node_p node)
1074 {
1075 	struct ngsock *const priv = NG_NODE_PRIVATE(node);
1076 	struct ngpcb *dpcbp, *pcbp;
1077 
1078 	mtx_lock(&priv->mtx);
1079 	dpcbp = priv->datasock;
1080 	pcbp = priv->ctlsock;
1081 
1082 	if (dpcbp != NULL)
1083 		soisdisconnected(dpcbp->ng_socket);
1084 
1085 	if (pcbp != NULL)
1086 		soisdisconnected(pcbp->ng_socket);
1087 
1088 	priv->node = NULL;
1089 	NG_NODE_SET_PRIVATE(node, NULL);
1090 	ng_socket_free_priv(priv);
1091 
1092 	NG_NODE_UNREF(node);
1093 	return (0);
1094 }
1095 
1096 static void
1097 ng_socket_item_applied(void *context, int error)
1098 {
1099 	struct ngsock *const priv = (struct ngsock *)context;
1100 
1101 	mtx_lock(&priv->mtx);
1102 	priv->error = error;
1103 	wakeup(priv);
1104 	mtx_unlock(&priv->mtx);
1105 
1106 }
1107 
1108 static	int
1109 dummy_disconnect(struct socket *so)
1110 {
1111 	return (0);
1112 }
1113 /*
1114  * Control and data socket type descriptors
1115  *
1116  * XXXRW: Perhaps _close should do something?
1117  */
1118 
1119 static struct pr_usrreqs ngc_usrreqs = {
1120 	.pru_abort =		NULL,
1121 	.pru_attach =		ngc_attach,
1122 	.pru_bind =		ngc_bind,
1123 	.pru_connect =		ngc_connect,
1124 	.pru_detach =		ngc_detach,
1125 	.pru_disconnect =	dummy_disconnect,
1126 	.pru_peeraddr =		NULL,
1127 	.pru_send =		ngc_send,
1128 	.pru_shutdown =		NULL,
1129 	.pru_sockaddr =		ng_getsockaddr,
1130 	.pru_close =		NULL,
1131 };
1132 
1133 static struct pr_usrreqs ngd_usrreqs = {
1134 	.pru_abort =		NULL,
1135 	.pru_attach =		ngd_attach,
1136 	.pru_bind =		NULL,
1137 	.pru_connect =		ngd_connect,
1138 	.pru_detach =		ngd_detach,
1139 	.pru_disconnect =	dummy_disconnect,
1140 	.pru_peeraddr =		NULL,
1141 	.pru_send =		ngd_send,
1142 	.pru_shutdown =		NULL,
1143 	.pru_sockaddr =		ng_getsockaddr,
1144 	.pru_close =		NULL,
1145 };
1146 
1147 /*
1148  * Definitions of protocols supported in the NETGRAPH domain.
1149  */
1150 
1151 extern struct domain ngdomain;		/* stop compiler warnings */
1152 
1153 static struct protosw ngsw[] = {
1154 {
1155 	.pr_type =		SOCK_DGRAM,
1156 	.pr_domain =		&ngdomain,
1157 	.pr_protocol =		NG_CONTROL,
1158 	.pr_flags =		PR_ATOMIC | PR_ADDR /* | PR_RIGHTS */,
1159 	.pr_usrreqs =		&ngc_usrreqs
1160 },
1161 {
1162 	.pr_type =		SOCK_DGRAM,
1163 	.pr_domain =		&ngdomain,
1164 	.pr_protocol =		NG_DATA,
1165 	.pr_flags =		PR_ATOMIC | PR_ADDR,
1166 	.pr_usrreqs =		&ngd_usrreqs
1167 }
1168 };
1169 
1170 struct domain ngdomain = {
1171 	.dom_family =		AF_NETGRAPH,
1172 	.dom_name =		"netgraph",
1173 	.dom_protosw =		ngsw,
1174 	.dom_protoswNPROTOSW =	&ngsw[nitems(ngsw)]
1175 };
1176 
1177 /*
1178  * Handle loading and unloading for this node type.
1179  * This is to handle auxiliary linkages (e.g protocol domain addition).
1180  */
1181 static int
1182 ngs_mod_event(module_t mod, int event, void *data)
1183 {
1184 	int error = 0;
1185 
1186 	switch (event) {
1187 	case MOD_LOAD:
1188 		mtx_init(&ngsocketlist_mtx, "ng_socketlist", NULL, MTX_DEF);
1189 		break;
1190 	case MOD_UNLOAD:
1191 		/* Ensure there are no open netgraph sockets. */
1192 		if (!LIST_EMPTY(&ngsocklist)) {
1193 			error = EBUSY;
1194 			break;
1195 		}
1196 #ifdef NOTYET
1197 		/* Unregister protocol domain XXX can't do this yet.. */
1198 #endif
1199 		error = EBUSY;
1200 		break;
1201 	default:
1202 		error = EOPNOTSUPP;
1203 		break;
1204 	}
1205 	return (error);
1206 }
1207 
1208 VNET_DOMAIN_SET(ng);
1209 
1210 SYSCTL_INT(_net_graph, OID_AUTO, family, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, AF_NETGRAPH, "");
1211 static SYSCTL_NODE(_net_graph, OID_AUTO, data, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1212     "DATA");
1213 SYSCTL_INT(_net_graph_data, OID_AUTO, proto, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_DATA, "");
1214 static SYSCTL_NODE(_net_graph, OID_AUTO, control, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1215     "CONTROL");
1216 SYSCTL_INT(_net_graph_control, OID_AUTO, proto, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, NG_CONTROL, "");
1217 
1218